blob: 4bb08b5e07e885d097e42fd38b86e11c356ef235 [file] [log] [blame]
drha5d14fe2004-05-04 15:00:46 +00001/*
2** 2004 April 13
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12** This file contains routines used to translate between UTF-8,
13** UTF-16, UTF-16BE, and UTF-16LE.
14**
danielk1977d7e69642004-06-23 00:23:49 +000015** $Id: utf.c,v 1.24 2004/06/23 00:23:49 danielk1977 Exp $
drha5d14fe2004-05-04 15:00:46 +000016**
17** Notes on UTF-8:
18**
19** Byte-0 Byte-1 Byte-2 Byte-3 Value
20** 0xxxxxxx 00000000 00000000 0xxxxxxx
21** 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx
22** 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx
23** 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx
24**
25**
26** Notes on UTF-16: (with wwww+1==uuuuu)
27**
drh51846b52004-05-28 16:00:21 +000028** Word-0 Word-1 Value
29** 110110ww wwzzzzyy 110111yy yyxxxxxx 000uuuuu zzzzyyyy yyxxxxxx
30** zzzzyyyy yyxxxxxx 00000000 zzzzyyyy yyxxxxxx
drha5d14fe2004-05-04 15:00:46 +000031**
danielk1977998b56c2004-05-06 23:37:52 +000032**
drha5d14fe2004-05-04 15:00:46 +000033** BOM or Byte Order Mark:
34** 0xff 0xfe little-endian utf-16 follows
35** 0xfe 0xff big-endian utf-16 follows
danielk1977998b56c2004-05-06 23:37:52 +000036**
37**
38** Handling of malformed strings:
39**
40** SQLite accepts and processes malformed strings without an error wherever
41** possible. However this is not possible when converting between UTF-8 and
42** UTF-16.
43**
44** When converting malformed UTF-8 strings to UTF-16, one instance of the
45** replacement character U+FFFD for each byte that cannot be interpeted as
46** part of a valid unicode character.
47**
48** When converting malformed UTF-16 strings to UTF-8, one instance of the
49** replacement character U+FFFD for each pair of bytes that cannot be
50** interpeted as part of a valid unicode character.
danielk1977bfd6cce2004-06-18 04:24:54 +000051**
52** This file contains the following public routines:
53**
54** sqlite3VdbeMemTranslate() - Translate the encoding used by a Mem* string.
55** sqlite3VdbeMemHandleBom() - Handle byte-order-marks in UTF16 Mem* strings.
56** sqlite3utf16ByteLen() - Calculate byte-length of a void* UTF16 string.
57** sqlite3utf8CharLen() - Calculate char-length of a char* UTF8 string.
58** sqlite3utf8LikeCompare() - Do a LIKE match given two UTF8 char* strings.
59**
drha5d14fe2004-05-04 15:00:46 +000060*/
danielk1977998b56c2004-05-06 23:37:52 +000061#include <assert.h>
danielk1977998b56c2004-05-06 23:37:52 +000062#include "sqliteInt.h"
danielk1977bfd6cce2004-06-18 04:24:54 +000063#include "vdbeInt.h"
danielk1977998b56c2004-05-06 23:37:52 +000064
65/*
danielk1977d02eb1f2004-06-06 09:44:03 +000066** The following macro, LOWERCASE(x), takes an integer representing a
67** unicode code point. The value returned is the same code point folded to
68** lower case, if applicable. SQLite currently understands the upper/lower
69** case relationship between the 26 characters used in the English
70** language only.
71**
72** This means that characters with umlauts etc. will not be folded
73** correctly (unless they are encoded as composite characters, which would
74** doubtless cause much trouble).
75*/
danielk19773f6b0872004-06-17 05:36:44 +000076#define LOWERCASE(x) (x<91?(int)(UpperToLower[x]):x)
danielk1977d02eb1f2004-06-06 09:44:03 +000077static unsigned char UpperToLower[91] = {
78 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
79 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
80 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
81 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
82 104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
83 122,
84};
85
86/*
danielk1977bfd6cce2004-06-18 04:24:54 +000087** This table maps from the first byte of a UTF-8 character to the number
88** of trailing bytes expected. A value '255' indicates that the table key
89** is not a legal first byte for a UTF-8 character.
danielk1977d02eb1f2004-06-06 09:44:03 +000090*/
danielk1977bfd6cce2004-06-18 04:24:54 +000091static const u8 xtra_utf8_bytes[256] = {
92/* 0xxxxxxx */
930, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
940, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
950, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
960, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
970, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
980, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
990, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
danielk1977d02eb1f2004-06-06 09:44:03 +0000101
danielk1977bfd6cce2004-06-18 04:24:54 +0000102/* 10wwwwww */
103255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
104255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
105255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
106255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
danielk1977ad7dd422004-06-06 12:41:49 +0000107
danielk1977bfd6cce2004-06-18 04:24:54 +0000108/* 110yyyyy */
1091, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1101, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111
112/* 1110zzzz */
1132, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114
115/* 11110yyy */
1163, 3, 3, 3, 3, 3, 3, 3, 255, 255, 255, 255, 255, 255, 255, 255,
117};
118
119/*
120** This table maps from the number of trailing bytes in a UTF-8 character
121** to an integer constant that is effectively calculated for each character
122** read by a naive implementation of a UTF-8 character reader. The code
123** in the READ_UTF8 macro explains things best.
124*/
125static const int xtra_utf8_bits[4] = {
1260,
12712416, /* (0xC0 << 6) + (0x80) */
128925824, /* (0xE0 << 12) + (0x80 << 6) + (0x80) */
12963447168 /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
130};
131
132#define READ_UTF8(zIn, c) { \
133 int xtra; \
134 c = *(zIn)++; \
135 xtra = xtra_utf8_bytes[c]; \
136 switch( xtra ){ \
137 case 255: c = (int)0xFFFD; break; \
138 case 3: c = (c<<6) + *(zIn)++; \
139 case 2: c = (c<<6) + *(zIn)++; \
140 case 1: c = (c<<6) + *(zIn)++; \
141 c -= xtra_utf8_bits[xtra]; \
142 } \
143}
144
145#define SKIP_UTF8(zIn) { \
146 zIn += (xtra_utf8_bytes[*(u8 *)zIn] + 1); \
147}
148
149#define WRITE_UTF8(zOut, c) { \
150 if( c<0x00080 ){ \
151 *zOut++ = (c&0xFF); \
152 } \
153 else if( c<0x00800 ){ \
154 *zOut++ = 0xC0 + ((c>>6)&0x1F); \
155 *zOut++ = 0x80 + (c & 0x3F); \
156 } \
157 else if( c<0x10000 ){ \
158 *zOut++ = 0xE0 + ((c>>12)&0x0F); \
159 *zOut++ = 0x80 + ((c>>6) & 0x3F); \
160 *zOut++ = 0x80 + (c & 0x3F); \
161 }else{ \
162 *zOut++ = 0xF0 + ((c>>18) & 0x07); \
163 *zOut++ = 0x80 + ((c>>12) & 0x3F); \
164 *zOut++ = 0x80 + ((c>>6) & 0x3F); \
165 *zOut++ = 0x80 + (c & 0x3F); \
166 } \
167}
168
169#define WRITE_UTF16LE(zOut, c) { \
170 if( c<=0xFFFF ){ \
171 *zOut++ = (c&0x00FF); \
172 *zOut++ = ((c>>8)&0x00FF); \
173 }else{ \
174 *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \
175 *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03)); \
176 *zOut++ = (c&0x00FF); \
177 *zOut++ = (0x00DC + ((c>>8)&0x03)); \
178 } \
179}
180
181#define WRITE_UTF16BE(zOut, c) { \
182 if( c<=0xFFFF ){ \
183 *zOut++ = ((c>>8)&0x00FF); \
184 *zOut++ = (c&0x00FF); \
185 }else{ \
186 *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03)); \
187 *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \
188 *zOut++ = (0x00DC + ((c>>8)&0x03)); \
189 *zOut++ = (c&0x00FF); \
190 } \
191}
192
193#define READ_UTF16LE(zIn, c){ \
194 c = (*zIn++); \
195 c += ((*zIn++)<<8); \
196 if( c>=0xD800 && c<=0xE000 ){ \
197 int c2 = (*zIn++); \
198 c2 += ((*zIn++)<<8); \
199 c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \
200 } \
201}
202
203#define READ_UTF16BE(zIn, c){ \
204 c = ((*zIn++)<<8); \
205 c += (*zIn++); \
206 if( c>=0xD800 && c<=0xE000 ){ \
207 int c2 = ((*zIn++)<<8); \
208 c2 += (*zIn++); \
209 c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \
210 } \
211}
212
213/*
214** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
215** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
216*/
217/* #define TRANSLATE_TRACE 1 */
218
219/*
220** This routine transforms the internal text encoding used by pMem to
221** desiredEnc. It is an error if the string is already of the desired
222** encoding, or if *pMem does not contain a string value.
223*/
224int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
225 unsigned char zShort[NBFS]; /* Temporary short output buffer */
226 int len; /* Maximum length of output string in bytes */
227 unsigned char *zOut; /* Output buffer */
228 unsigned char *zIn; /* Input iterator */
229 unsigned char *zTerm; /* End of input */
230 unsigned char *z; /* Output iterator */
231 int c;
232
233 assert( pMem->flags&MEM_Str );
234 assert( pMem->enc!=desiredEnc );
235 assert( pMem->enc!=0 );
236 assert( pMem->n>=0 );
237
238#ifdef TRANSLATE_TRACE
239 {
240 char zBuf[100];
241 sqlite3VdbeMemPrettyPrint(pMem, zBuf, 100);
242 fprintf(stderr, "INPUT: %s\n", zBuf);
danielk1977ad7dd422004-06-06 12:41:49 +0000243 }
244#endif
245
danielk1977bfd6cce2004-06-18 04:24:54 +0000246 /* If the translation is between UTF-16 little and big endian, then
247 ** all that is required is to swap the byte order. This case is handled
248 ** differently from the others.
danielk1977998b56c2004-05-06 23:37:52 +0000249 */
danielk1977bfd6cce2004-06-18 04:24:54 +0000250 if( pMem->enc!=SQLITE_UTF8 && desiredEnc!=SQLITE_UTF8 ){
251 u8 temp;
252 sqlite3VdbeMemMakeWriteable(pMem);
253 zIn = pMem->z;
254 zTerm = &zIn[pMem->n];
255 while( zIn<zTerm ){
256 temp = *zIn;
257 *zIn = *(zIn+1);
258 zIn++;
259 *zIn++ = temp;
260 }
261 pMem->enc = desiredEnc;
262 goto translate_out;
263 }
264
danielk1977d7e69642004-06-23 00:23:49 +0000265 /* Set len to the maximum number of bytes required in the output buffer. */
266 if( desiredEnc==SQLITE_UTF8 ){
267 /* When converting from UTF-16, the maximum growth results from
268 ** translating a 2-byte character to a 3-byte UTF-8 character (i.e.
269 ** code-point 0xFFFC). A single byte is required for the output string
270 ** nul-terminator.
271 */
272 len = (pMem->n/2) * 3 + 1;
273 }else{
274 /* When converting from UTF-8 to UTF-16 the maximum growth is caused
275 ** when a 1-byte UTF-8 character is translated into a 2-byte UTF-16
276 ** character. Two bytes are required in the output buffer for the
277 ** nul-terminator.
278 */
279 len = pMem->n * 2 + 2;
280 }
281
danielk1977bfd6cce2004-06-18 04:24:54 +0000282 /* Set zIn to point at the start of the input buffer and zTerm to point 1
283 ** byte past the end.
284 **
285 ** Variable zOut is set to point at the output buffer. This may be space
286 ** obtained from malloc(), or Mem.zShort, if it large enough and not in
287 ** use, or the zShort array on the stack (see above).
288 */
289 zIn = pMem->z;
290 zTerm = &zIn[pMem->n];
danielk1977bfd6cce2004-06-18 04:24:54 +0000291 if( len>NBFS ){
292 zOut = sqliteMallocRaw(len);
293 if( !zOut ) return SQLITE_NOMEM;
294 }else{
295 if( pMem->z==pMem->zShort ){
296 zOut = zShort;
297 }else{
298 zOut = pMem->zShort;
299 }
300 }
301 z = zOut;
302
303 if( pMem->enc==SQLITE_UTF8 ){
304 if( desiredEnc==SQLITE_UTF16LE ){
305 /* UTF-8 -> UTF-16 Little-endian */
306 while( zIn<zTerm ){
307 READ_UTF8(zIn, c);
308 WRITE_UTF16LE(z, c);
309 }
310 WRITE_UTF16LE(z, 0);
311 pMem->n = (z-zOut)-2;
312 }else if( desiredEnc==SQLITE_UTF16BE ){
313 /* UTF-8 -> UTF-16 Big-endian */
314 while( zIn<zTerm ){
315 READ_UTF8(zIn, c);
316 WRITE_UTF16BE(z, c);
317 }
318 WRITE_UTF16BE(z, 0);
319 pMem->n = (z-zOut)-2;
320 }
321 }else{
322 assert( desiredEnc==SQLITE_UTF8 );
323 if( pMem->enc==SQLITE_UTF16LE ){
324 /* UTF-16 Little-endian -> UTF-8 */
325 while( zIn<zTerm ){
326 READ_UTF16LE(zIn, c);
327 WRITE_UTF8(z, c);
328 }
329 WRITE_UTF8(z, 0);
330 pMem->n = (z-zOut)-1;
331 }else{
332 /* UTF-16 Little-endian -> UTF-8 */
333 while( zIn<zTerm ){
334 READ_UTF16BE(zIn, c);
335 WRITE_UTF8(z, c);
336 }
337 WRITE_UTF8(z, 0);
338 pMem->n = (z-zOut)-1;
danielk1977998b56c2004-05-06 23:37:52 +0000339 }
340 }
danielk1977d7e69642004-06-23 00:23:49 +0000341 assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );
danielk1977998b56c2004-05-06 23:37:52 +0000342
danielk1977bfd6cce2004-06-18 04:24:54 +0000343 sqlite3VdbeMemRelease(pMem);
344 pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short);
345 pMem->enc = desiredEnc;
346 if( (char *)zOut==pMem->zShort ){
347 pMem->flags |= (MEM_Term|MEM_Short);
348 }else if( zOut==zShort ){
349 memcpy(pMem->zShort, zOut, len);
350 zOut = pMem->zShort;
351 pMem->flags |= (MEM_Term|MEM_Short);
352 }else{
353 pMem->flags |= (MEM_Term|MEM_Dyn);
354 }
355 pMem->z = zOut;
356
357translate_out:
358#ifdef TRANSLATE_TRACE
359 {
360 char zBuf[100];
361 sqlite3VdbeMemPrettyPrint(pMem, zBuf, 100);
362 fprintf(stderr, "OUTPUT: %s\n", zBuf);
363 }
364#endif
365 return SQLITE_OK;
danielk1977998b56c2004-05-06 23:37:52 +0000366}
367
danielk197793d46752004-05-23 13:30:58 +0000368/*
danielk1977bfd6cce2004-06-18 04:24:54 +0000369** This routine checks for a byte-order mark at the beginning of the
370** UTF-16 string stored in *pMem. If one is present, it is removed and
371** the encoding of the Mem adjusted. This routine does not do any
372** byte-swapping, it just sets Mem.enc appropriately.
373**
374** The allocation (static, dynamic etc.) and encoding of the Mem may be
375** changed by this function.
danielk197793d46752004-05-23 13:30:58 +0000376*/
danielk1977bfd6cce2004-06-18 04:24:54 +0000377int sqlite3VdbeMemHandleBom(Mem *pMem){
378 int rc = SQLITE_OK;
379 u8 bom = 0;
380
381 if( pMem->n<0 || pMem->n>1 ){
382 u8 b1 = *(u8 *)pMem->z;
383 u8 b2 = *(((u8 *)pMem->z) + 1);
danielk197793d46752004-05-23 13:30:58 +0000384 if( b1==0xFE && b2==0xFF ){
danielk1977bfd6cce2004-06-18 04:24:54 +0000385 bom = SQLITE_UTF16BE;
danielk197793d46752004-05-23 13:30:58 +0000386 }
387 if( b1==0xFF && b2==0xFE ){
danielk1977bfd6cce2004-06-18 04:24:54 +0000388 bom = SQLITE_UTF16LE;
danielk197793d46752004-05-23 13:30:58 +0000389 }
390 }
danielk1977bfd6cce2004-06-18 04:24:54 +0000391
392 if( bom ){
393 if( pMem->flags & MEM_Short ){
394 memmove(pMem->zShort, &pMem->zShort[2], NBFS-2);
395 pMem->n -= 2;
396 pMem->enc = bom;
danielk1977998b56c2004-05-06 23:37:52 +0000397 }
danielk1977bfd6cce2004-06-18 04:24:54 +0000398 else if( pMem->flags & MEM_Dyn ){
399 void (*xDel)(void*) = pMem->xDel;
400 char *z = pMem->z;
401 pMem->z = 0;
402 pMem->xDel = 0;
403 rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT);
404 if( xDel ){
405 xDel(z);
406 }else{
407 sqliteFree(z);
408 }
409 }else{
410 rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom,
411 SQLITE_TRANSIENT);
412 }
danielk1977998b56c2004-05-06 23:37:52 +0000413 }
danielk1977bfd6cce2004-06-18 04:24:54 +0000414 return rc;
danielk1977998b56c2004-05-06 23:37:52 +0000415}
416
417/*
danielk19776622cce2004-05-20 11:00:52 +0000418** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,
419** return the number of unicode characters in pZ up to (but not including)
420** the first 0x00 byte. If nByte is not less than zero, return the
421** number of unicode characters in the first nByte of pZ (or up to
422** the first 0x00, whichever comes first).
danielk1977998b56c2004-05-06 23:37:52 +0000423*/
danielk1977bfd6cce2004-06-18 04:24:54 +0000424int sqlite3utf8CharLen(const char *z, int nByte){
425 int r = 0;
426 const char *zTerm;
427 if( nByte>0 ){
428 zTerm = &z[nByte];
429 }else{
430 zTerm = (const char *)(-1);
danielk1977998b56c2004-05-06 23:37:52 +0000431 }
danielk1977bfd6cce2004-06-18 04:24:54 +0000432 assert( z<=zTerm );
433 while( *z!=0 && z<zTerm ){
434 SKIP_UTF8(z);
435 r++;
436 }
437 return r;
danielk19776622cce2004-05-20 11:00:52 +0000438}
439
440/*
441** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,
442** return the number of bytes up to (but not including), the first pair
443** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,
444** then return the number of bytes in the first nChar unicode characters
445** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).
446*/
danielk1977bfd6cce2004-06-18 04:24:54 +0000447int sqlite3utf16ByteLen(const void *zIn, int nChar){
448 int c = 1;
449 char const *z = zIn;
450 int n = 0;
451 if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
452 while( c && ((nChar<0) || n<nChar) ){
453 READ_UTF16BE(z, c);
454 n++;
danielk19776622cce2004-05-20 11:00:52 +0000455 }
danielk19776622cce2004-05-20 11:00:52 +0000456 }else{
danielk1977bfd6cce2004-06-18 04:24:54 +0000457 while( c && ((nChar<0) || n<nChar) ){
458 READ_UTF16LE(z, c);
459 n++;
danielk19776622cce2004-05-20 11:00:52 +0000460 }
danielk19776622cce2004-05-20 11:00:52 +0000461 }
danielk1977bfd6cce2004-06-18 04:24:54 +0000462 return (z-(char const *)zIn)-((c==0)?2:0);
danielk1977998b56c2004-05-06 23:37:52 +0000463}
464
drha5d14fe2004-05-04 15:00:46 +0000465/*
danielk19773f6b0872004-06-17 05:36:44 +0000466** Compare two UTF-8 strings for equality using the "LIKE" operator of
467** SQL. The '%' character matches any sequence of 0 or more
468** characters and '_' matches any single character. Case is
469** not significant.
470*/
471int sqlite3utf8LikeCompare(
472 const unsigned char *zPattern,
473 const unsigned char *zString
474){
475 register int c;
476 int c2;
477
478 while( (c = LOWERCASE(*zPattern))!=0 ){
479 switch( c ){
480 case '%': {
481 while( (c=zPattern[1]) == '%' || c == '_' ){
482 if( c=='_' ){
483 if( *zString==0 ) return 0;
danielk1977bfd6cce2004-06-18 04:24:54 +0000484 SKIP_UTF8(zString);
danielk19773f6b0872004-06-17 05:36:44 +0000485 }
486 zPattern++;
487 }
488 if( c==0 ) return 1;
489 c = LOWERCASE(c);
490 while( (c2=LOWERCASE(*zString))!=0 ){
491 while( c2 != 0 && c2 != c ){
492 zString++;
493 c2 = LOWERCASE(*zString);
494 }
495 if( c2==0 ) return 0;
496 if( sqlite3utf8LikeCompare(&zPattern[1],zString) ) return 1;
danielk1977bfd6cce2004-06-18 04:24:54 +0000497 SKIP_UTF8(zString);
danielk19773f6b0872004-06-17 05:36:44 +0000498 }
499 return 0;
500 }
501 case '_': {
502 if( *zString==0 ) return 0;
danielk1977bfd6cce2004-06-18 04:24:54 +0000503 SKIP_UTF8(zString);
danielk19773f6b0872004-06-17 05:36:44 +0000504 zPattern++;
505 break;
506 }
507 default: {
508 if( c != LOWERCASE(*zString) ) return 0;
509 zPattern++;
510 zString++;
511 break;
512 }
513 }
514 }
515 return *zString==0;
516}
danielk1977bfd6cce2004-06-18 04:24:54 +0000517
drh38f82712004-06-18 17:10:16 +0000518#if defined(SQLITE_TEST)
danielk1977bfd6cce2004-06-18 04:24:54 +0000519/*
520** This routine is called from the TCL test function "translate_selftest".
521** It checks that the primitives for serializing and deserializing
522** characters in each encoding are inverses of each other.
523*/
524void sqlite3utfSelfTest(){
525 int i;
526 unsigned char zBuf[20];
527 unsigned char *z;
528 int n;
529 int c;
530
531 for(i=0; 0 && i<0x00110000; i++){
532 z = zBuf;
533 WRITE_UTF8(z, i);
534 n = z-zBuf;
535 z = zBuf;
536 READ_UTF8(z, c);
537 assert( c==i );
538 assert( (z-zBuf)==n );
539 }
540 for(i=0; i<0x00110000; i++){
541 if( i>=0xD800 && i<=0xE000 ) continue;
542 z = zBuf;
543 WRITE_UTF16LE(z, i);
544 n = z-zBuf;
545 z = zBuf;
546 READ_UTF16LE(z, c);
547 assert( c==i );
548 assert( (z-zBuf)==n );
549 }
550 for(i=0; i<0x00110000; i++){
551 if( i>=0xD800 && i<=0xE000 ) continue;
552 z = zBuf;
553 WRITE_UTF16BE(z, i);
554 n = z-zBuf;
555 z = zBuf;
556 READ_UTF16BE(z, c);
557 assert( c==i );
558 assert( (z-zBuf)==n );
559 }
560}
561#endif