blob: 98e13abf4ece87b48606c49c0934af9d63a8aef2 [file] [log] [blame]
drha5d14fe2004-05-04 15:00:46 +00001/*
2** 2004 April 13
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12** This file contains routines used to translate between UTF-8,
13** UTF-16, UTF-16BE, and UTF-16LE.
14**
danielk1977bfd6cce2004-06-18 04:24:54 +000015** $Id: utf.c,v 1.21 2004/06/18 04:24:55 danielk1977 Exp $
drha5d14fe2004-05-04 15:00:46 +000016**
17** Notes on UTF-8:
18**
19** Byte-0 Byte-1 Byte-2 Byte-3 Value
20** 0xxxxxxx 00000000 00000000 0xxxxxxx
21** 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx
22** 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx
23** 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx
24**
25**
26** Notes on UTF-16: (with wwww+1==uuuuu)
27**
drh51846b52004-05-28 16:00:21 +000028** Word-0 Word-1 Value
29** 110110ww wwzzzzyy 110111yy yyxxxxxx 000uuuuu zzzzyyyy yyxxxxxx
30** zzzzyyyy yyxxxxxx 00000000 zzzzyyyy yyxxxxxx
drha5d14fe2004-05-04 15:00:46 +000031**
danielk1977998b56c2004-05-06 23:37:52 +000032**
drha5d14fe2004-05-04 15:00:46 +000033** BOM or Byte Order Mark:
34** 0xff 0xfe little-endian utf-16 follows
35** 0xfe 0xff big-endian utf-16 follows
danielk1977998b56c2004-05-06 23:37:52 +000036**
37**
38** Handling of malformed strings:
39**
40** SQLite accepts and processes malformed strings without an error wherever
41** possible. However this is not possible when converting between UTF-8 and
42** UTF-16.
43**
44** When converting malformed UTF-8 strings to UTF-16, one instance of the
45** replacement character U+FFFD for each byte that cannot be interpeted as
46** part of a valid unicode character.
47**
48** When converting malformed UTF-16 strings to UTF-8, one instance of the
49** replacement character U+FFFD for each pair of bytes that cannot be
50** interpeted as part of a valid unicode character.
danielk1977bfd6cce2004-06-18 04:24:54 +000051**
52** This file contains the following public routines:
53**
54** sqlite3VdbeMemTranslate() - Translate the encoding used by a Mem* string.
55** sqlite3VdbeMemHandleBom() - Handle byte-order-marks in UTF16 Mem* strings.
56** sqlite3utf16ByteLen() - Calculate byte-length of a void* UTF16 string.
57** sqlite3utf8CharLen() - Calculate char-length of a char* UTF8 string.
58** sqlite3utf8LikeCompare() - Do a LIKE match given two UTF8 char* strings.
59**
drha5d14fe2004-05-04 15:00:46 +000060*/
danielk1977998b56c2004-05-06 23:37:52 +000061#include <assert.h>
danielk1977998b56c2004-05-06 23:37:52 +000062#include "sqliteInt.h"
danielk1977bfd6cce2004-06-18 04:24:54 +000063#include "vdbeInt.h"
danielk1977998b56c2004-05-06 23:37:52 +000064
65/*
danielk1977d02eb1f2004-06-06 09:44:03 +000066** The following macro, LOWERCASE(x), takes an integer representing a
67** unicode code point. The value returned is the same code point folded to
68** lower case, if applicable. SQLite currently understands the upper/lower
69** case relationship between the 26 characters used in the English
70** language only.
71**
72** This means that characters with umlauts etc. will not be folded
73** correctly (unless they are encoded as composite characters, which would
74** doubtless cause much trouble).
75*/
danielk19773f6b0872004-06-17 05:36:44 +000076#define LOWERCASE(x) (x<91?(int)(UpperToLower[x]):x)
danielk1977d02eb1f2004-06-06 09:44:03 +000077static unsigned char UpperToLower[91] = {
78 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
79 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
80 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
81 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
82 104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
83 122,
84};
85
86/*
danielk1977bfd6cce2004-06-18 04:24:54 +000087** This table maps from the first byte of a UTF-8 character to the number
88** of trailing bytes expected. A value '255' indicates that the table key
89** is not a legal first byte for a UTF-8 character.
danielk1977d02eb1f2004-06-06 09:44:03 +000090*/
danielk1977bfd6cce2004-06-18 04:24:54 +000091static const u8 xtra_utf8_bytes[256] = {
92/* 0xxxxxxx */
930, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
940, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
950, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
960, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
970, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
980, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
990, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
danielk1977d02eb1f2004-06-06 09:44:03 +0000101
danielk1977bfd6cce2004-06-18 04:24:54 +0000102/* 10wwwwww */
103255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
104255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
105255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
106255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
danielk1977ad7dd422004-06-06 12:41:49 +0000107
danielk1977bfd6cce2004-06-18 04:24:54 +0000108/* 110yyyyy */
1091, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1101, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111
112/* 1110zzzz */
1132, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114
115/* 11110yyy */
1163, 3, 3, 3, 3, 3, 3, 3, 255, 255, 255, 255, 255, 255, 255, 255,
117};
118
119/*
120** This table maps from the number of trailing bytes in a UTF-8 character
121** to an integer constant that is effectively calculated for each character
122** read by a naive implementation of a UTF-8 character reader. The code
123** in the READ_UTF8 macro explains things best.
124*/
125static const int xtra_utf8_bits[4] = {
1260,
12712416, /* (0xC0 << 6) + (0x80) */
128925824, /* (0xE0 << 12) + (0x80 << 6) + (0x80) */
12963447168 /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
130};
131
132#define READ_UTF8(zIn, c) { \
133 int xtra; \
134 c = *(zIn)++; \
135 xtra = xtra_utf8_bytes[c]; \
136 switch( xtra ){ \
137 case 255: c = (int)0xFFFD; break; \
138 case 3: c = (c<<6) + *(zIn)++; \
139 case 2: c = (c<<6) + *(zIn)++; \
140 case 1: c = (c<<6) + *(zIn)++; \
141 c -= xtra_utf8_bits[xtra]; \
142 } \
143}
144
145#define SKIP_UTF8(zIn) { \
146 zIn += (xtra_utf8_bytes[*(u8 *)zIn] + 1); \
147}
148
149#define WRITE_UTF8(zOut, c) { \
150 if( c<0x00080 ){ \
151 *zOut++ = (c&0xFF); \
152 } \
153 else if( c<0x00800 ){ \
154 *zOut++ = 0xC0 + ((c>>6)&0x1F); \
155 *zOut++ = 0x80 + (c & 0x3F); \
156 } \
157 else if( c<0x10000 ){ \
158 *zOut++ = 0xE0 + ((c>>12)&0x0F); \
159 *zOut++ = 0x80 + ((c>>6) & 0x3F); \
160 *zOut++ = 0x80 + (c & 0x3F); \
161 }else{ \
162 *zOut++ = 0xF0 + ((c>>18) & 0x07); \
163 *zOut++ = 0x80 + ((c>>12) & 0x3F); \
164 *zOut++ = 0x80 + ((c>>6) & 0x3F); \
165 *zOut++ = 0x80 + (c & 0x3F); \
166 } \
167}
168
169#define WRITE_UTF16LE(zOut, c) { \
170 if( c<=0xFFFF ){ \
171 *zOut++ = (c&0x00FF); \
172 *zOut++ = ((c>>8)&0x00FF); \
173 }else{ \
174 *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \
175 *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03)); \
176 *zOut++ = (c&0x00FF); \
177 *zOut++ = (0x00DC + ((c>>8)&0x03)); \
178 } \
179}
180
181#define WRITE_UTF16BE(zOut, c) { \
182 if( c<=0xFFFF ){ \
183 *zOut++ = ((c>>8)&0x00FF); \
184 *zOut++ = (c&0x00FF); \
185 }else{ \
186 *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03)); \
187 *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \
188 *zOut++ = (0x00DC + ((c>>8)&0x03)); \
189 *zOut++ = (c&0x00FF); \
190 } \
191}
192
193#define READ_UTF16LE(zIn, c){ \
194 c = (*zIn++); \
195 c += ((*zIn++)<<8); \
196 if( c>=0xD800 && c<=0xE000 ){ \
197 int c2 = (*zIn++); \
198 c2 += ((*zIn++)<<8); \
199 c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \
200 } \
201}
202
203#define READ_UTF16BE(zIn, c){ \
204 c = ((*zIn++)<<8); \
205 c += (*zIn++); \
206 if( c>=0xD800 && c<=0xE000 ){ \
207 int c2 = ((*zIn++)<<8); \
208 c2 += (*zIn++); \
209 c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \
210 } \
211}
212
213/*
214** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
215** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
216*/
217/* #define TRANSLATE_TRACE 1 */
218
219/*
220** This routine transforms the internal text encoding used by pMem to
221** desiredEnc. It is an error if the string is already of the desired
222** encoding, or if *pMem does not contain a string value.
223*/
224int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
225 unsigned char zShort[NBFS]; /* Temporary short output buffer */
226 int len; /* Maximum length of output string in bytes */
227 unsigned char *zOut; /* Output buffer */
228 unsigned char *zIn; /* Input iterator */
229 unsigned char *zTerm; /* End of input */
230 unsigned char *z; /* Output iterator */
231 int c;
232
233 assert( pMem->flags&MEM_Str );
234 assert( pMem->enc!=desiredEnc );
235 assert( pMem->enc!=0 );
236 assert( pMem->n>=0 );
237
238#ifdef TRANSLATE_TRACE
239 {
240 char zBuf[100];
241 sqlite3VdbeMemPrettyPrint(pMem, zBuf, 100);
242 fprintf(stderr, "INPUT: %s\n", zBuf);
danielk1977ad7dd422004-06-06 12:41:49 +0000243 }
244#endif
245
danielk1977bfd6cce2004-06-18 04:24:54 +0000246 /* If the translation is between UTF-16 little and big endian, then
247 ** all that is required is to swap the byte order. This case is handled
248 ** differently from the others.
danielk1977998b56c2004-05-06 23:37:52 +0000249 */
danielk1977bfd6cce2004-06-18 04:24:54 +0000250 if( pMem->enc!=SQLITE_UTF8 && desiredEnc!=SQLITE_UTF8 ){
251 u8 temp;
252 sqlite3VdbeMemMakeWriteable(pMem);
253 zIn = pMem->z;
254 zTerm = &zIn[pMem->n];
255 while( zIn<zTerm ){
256 temp = *zIn;
257 *zIn = *(zIn+1);
258 zIn++;
259 *zIn++ = temp;
260 }
261 pMem->enc = desiredEnc;
262 goto translate_out;
263 }
264
265 /* Set zIn to point at the start of the input buffer and zTerm to point 1
266 ** byte past the end.
267 **
268 ** Variable zOut is set to point at the output buffer. This may be space
269 ** obtained from malloc(), or Mem.zShort, if it large enough and not in
270 ** use, or the zShort array on the stack (see above).
271 */
272 zIn = pMem->z;
273 zTerm = &zIn[pMem->n];
274 len = pMem->n*2 + 2;
275 if( len>NBFS ){
276 zOut = sqliteMallocRaw(len);
277 if( !zOut ) return SQLITE_NOMEM;
278 }else{
279 if( pMem->z==pMem->zShort ){
280 zOut = zShort;
281 }else{
282 zOut = pMem->zShort;
283 }
284 }
285 z = zOut;
286
287 if( pMem->enc==SQLITE_UTF8 ){
288 if( desiredEnc==SQLITE_UTF16LE ){
289 /* UTF-8 -> UTF-16 Little-endian */
290 while( zIn<zTerm ){
291 READ_UTF8(zIn, c);
292 WRITE_UTF16LE(z, c);
293 }
294 WRITE_UTF16LE(z, 0);
295 pMem->n = (z-zOut)-2;
296 }else if( desiredEnc==SQLITE_UTF16BE ){
297 /* UTF-8 -> UTF-16 Big-endian */
298 while( zIn<zTerm ){
299 READ_UTF8(zIn, c);
300 WRITE_UTF16BE(z, c);
301 }
302 WRITE_UTF16BE(z, 0);
303 pMem->n = (z-zOut)-2;
304 }
305 }else{
306 assert( desiredEnc==SQLITE_UTF8 );
307 if( pMem->enc==SQLITE_UTF16LE ){
308 /* UTF-16 Little-endian -> UTF-8 */
309 while( zIn<zTerm ){
310 READ_UTF16LE(zIn, c);
311 WRITE_UTF8(z, c);
312 }
313 WRITE_UTF8(z, 0);
314 pMem->n = (z-zOut)-1;
315 }else{
316 /* UTF-16 Little-endian -> UTF-8 */
317 while( zIn<zTerm ){
318 READ_UTF16BE(zIn, c);
319 WRITE_UTF8(z, c);
320 }
321 WRITE_UTF8(z, 0);
322 pMem->n = (z-zOut)-1;
danielk1977998b56c2004-05-06 23:37:52 +0000323 }
324 }
325
danielk1977bfd6cce2004-06-18 04:24:54 +0000326 sqlite3VdbeMemRelease(pMem);
327 pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short);
328 pMem->enc = desiredEnc;
329 if( (char *)zOut==pMem->zShort ){
330 pMem->flags |= (MEM_Term|MEM_Short);
331 }else if( zOut==zShort ){
332 memcpy(pMem->zShort, zOut, len);
333 zOut = pMem->zShort;
334 pMem->flags |= (MEM_Term|MEM_Short);
335 }else{
336 pMem->flags |= (MEM_Term|MEM_Dyn);
337 }
338 pMem->z = zOut;
339
340translate_out:
341#ifdef TRANSLATE_TRACE
342 {
343 char zBuf[100];
344 sqlite3VdbeMemPrettyPrint(pMem, zBuf, 100);
345 fprintf(stderr, "OUTPUT: %s\n", zBuf);
346 }
347#endif
348 return SQLITE_OK;
danielk1977998b56c2004-05-06 23:37:52 +0000349}
350
danielk197793d46752004-05-23 13:30:58 +0000351/*
danielk1977bfd6cce2004-06-18 04:24:54 +0000352** This routine checks for a byte-order mark at the beginning of the
353** UTF-16 string stored in *pMem. If one is present, it is removed and
354** the encoding of the Mem adjusted. This routine does not do any
355** byte-swapping, it just sets Mem.enc appropriately.
356**
357** The allocation (static, dynamic etc.) and encoding of the Mem may be
358** changed by this function.
danielk197793d46752004-05-23 13:30:58 +0000359*/
danielk1977bfd6cce2004-06-18 04:24:54 +0000360int sqlite3VdbeMemHandleBom(Mem *pMem){
361 int rc = SQLITE_OK;
362 u8 bom = 0;
363
364 if( pMem->n<0 || pMem->n>1 ){
365 u8 b1 = *(u8 *)pMem->z;
366 u8 b2 = *(((u8 *)pMem->z) + 1);
danielk197793d46752004-05-23 13:30:58 +0000367 if( b1==0xFE && b2==0xFF ){
danielk1977bfd6cce2004-06-18 04:24:54 +0000368 bom = SQLITE_UTF16BE;
danielk197793d46752004-05-23 13:30:58 +0000369 }
370 if( b1==0xFF && b2==0xFE ){
danielk1977bfd6cce2004-06-18 04:24:54 +0000371 bom = SQLITE_UTF16LE;
danielk197793d46752004-05-23 13:30:58 +0000372 }
373 }
danielk1977bfd6cce2004-06-18 04:24:54 +0000374
375 if( bom ){
376 if( pMem->flags & MEM_Short ){
377 memmove(pMem->zShort, &pMem->zShort[2], NBFS-2);
378 pMem->n -= 2;
379 pMem->enc = bom;
danielk1977998b56c2004-05-06 23:37:52 +0000380 }
danielk1977bfd6cce2004-06-18 04:24:54 +0000381 else if( pMem->flags & MEM_Dyn ){
382 void (*xDel)(void*) = pMem->xDel;
383 char *z = pMem->z;
384 pMem->z = 0;
385 pMem->xDel = 0;
386 rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT);
387 if( xDel ){
388 xDel(z);
389 }else{
390 sqliteFree(z);
391 }
392 }else{
393 rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom,
394 SQLITE_TRANSIENT);
395 }
danielk1977998b56c2004-05-06 23:37:52 +0000396 }
danielk1977bfd6cce2004-06-18 04:24:54 +0000397 return rc;
danielk1977998b56c2004-05-06 23:37:52 +0000398}
399
400/*
danielk19776622cce2004-05-20 11:00:52 +0000401** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,
402** return the number of unicode characters in pZ up to (but not including)
403** the first 0x00 byte. If nByte is not less than zero, return the
404** number of unicode characters in the first nByte of pZ (or up to
405** the first 0x00, whichever comes first).
danielk1977998b56c2004-05-06 23:37:52 +0000406*/
danielk1977bfd6cce2004-06-18 04:24:54 +0000407int sqlite3utf8CharLen(const char *z, int nByte){
408 int r = 0;
409 const char *zTerm;
410 if( nByte>0 ){
411 zTerm = &z[nByte];
412 }else{
413 zTerm = (const char *)(-1);
danielk1977998b56c2004-05-06 23:37:52 +0000414 }
danielk1977bfd6cce2004-06-18 04:24:54 +0000415 assert( z<=zTerm );
416 while( *z!=0 && z<zTerm ){
417 SKIP_UTF8(z);
418 r++;
419 }
420 return r;
danielk19776622cce2004-05-20 11:00:52 +0000421}
422
423/*
424** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,
425** return the number of bytes up to (but not including), the first pair
426** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,
427** then return the number of bytes in the first nChar unicode characters
428** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).
429*/
danielk1977bfd6cce2004-06-18 04:24:54 +0000430int sqlite3utf16ByteLen(const void *zIn, int nChar){
431 int c = 1;
432 char const *z = zIn;
433 int n = 0;
434 if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
435 while( c && ((nChar<0) || n<nChar) ){
436 READ_UTF16BE(z, c);
437 n++;
danielk19776622cce2004-05-20 11:00:52 +0000438 }
danielk19776622cce2004-05-20 11:00:52 +0000439 }else{
danielk1977bfd6cce2004-06-18 04:24:54 +0000440 while( c && ((nChar<0) || n<nChar) ){
441 READ_UTF16LE(z, c);
442 n++;
danielk19776622cce2004-05-20 11:00:52 +0000443 }
danielk19776622cce2004-05-20 11:00:52 +0000444 }
danielk1977bfd6cce2004-06-18 04:24:54 +0000445 return (z-(char const *)zIn)-((c==0)?2:0);
danielk1977998b56c2004-05-06 23:37:52 +0000446}
447
drha5d14fe2004-05-04 15:00:46 +0000448/*
danielk19773f6b0872004-06-17 05:36:44 +0000449** Compare two UTF-8 strings for equality using the "LIKE" operator of
450** SQL. The '%' character matches any sequence of 0 or more
451** characters and '_' matches any single character. Case is
452** not significant.
453*/
454int sqlite3utf8LikeCompare(
455 const unsigned char *zPattern,
456 const unsigned char *zString
457){
458 register int c;
459 int c2;
460
461 while( (c = LOWERCASE(*zPattern))!=0 ){
462 switch( c ){
463 case '%': {
464 while( (c=zPattern[1]) == '%' || c == '_' ){
465 if( c=='_' ){
466 if( *zString==0 ) return 0;
danielk1977bfd6cce2004-06-18 04:24:54 +0000467 SKIP_UTF8(zString);
danielk19773f6b0872004-06-17 05:36:44 +0000468 }
469 zPattern++;
470 }
471 if( c==0 ) return 1;
472 c = LOWERCASE(c);
473 while( (c2=LOWERCASE(*zString))!=0 ){
474 while( c2 != 0 && c2 != c ){
475 zString++;
476 c2 = LOWERCASE(*zString);
477 }
478 if( c2==0 ) return 0;
479 if( sqlite3utf8LikeCompare(&zPattern[1],zString) ) return 1;
danielk1977bfd6cce2004-06-18 04:24:54 +0000480 SKIP_UTF8(zString);
danielk19773f6b0872004-06-17 05:36:44 +0000481 }
482 return 0;
483 }
484 case '_': {
485 if( *zString==0 ) return 0;
danielk1977bfd6cce2004-06-18 04:24:54 +0000486 SKIP_UTF8(zString);
danielk19773f6b0872004-06-17 05:36:44 +0000487 zPattern++;
488 break;
489 }
490 default: {
491 if( c != LOWERCASE(*zString) ) return 0;
492 zPattern++;
493 zString++;
494 break;
495 }
496 }
497 }
498 return *zString==0;
499}
danielk1977bfd6cce2004-06-18 04:24:54 +0000500
501#ifndef NDEBUG
502/*
503** This routine is called from the TCL test function "translate_selftest".
504** It checks that the primitives for serializing and deserializing
505** characters in each encoding are inverses of each other.
506*/
507void sqlite3utfSelfTest(){
508 int i;
509 unsigned char zBuf[20];
510 unsigned char *z;
511 int n;
512 int c;
513
514 for(i=0; 0 && i<0x00110000; i++){
515 z = zBuf;
516 WRITE_UTF8(z, i);
517 n = z-zBuf;
518 z = zBuf;
519 READ_UTF8(z, c);
520 assert( c==i );
521 assert( (z-zBuf)==n );
522 }
523 for(i=0; i<0x00110000; i++){
524 if( i>=0xD800 && i<=0xE000 ) continue;
525 z = zBuf;
526 WRITE_UTF16LE(z, i);
527 n = z-zBuf;
528 z = zBuf;
529 READ_UTF16LE(z, c);
530 assert( c==i );
531 assert( (z-zBuf)==n );
532 }
533 for(i=0; i<0x00110000; i++){
534 if( i>=0xD800 && i<=0xE000 ) continue;
535 z = zBuf;
536 WRITE_UTF16BE(z, i);
537 n = z-zBuf;
538 z = zBuf;
539 READ_UTF16BE(z, c);
540 assert( c==i );
541 assert( (z-zBuf)==n );
542 }
543}
544#endif
545
546