blob: 65dd05e4a3981e5dba864cda0e2289e7326d9eba [file] [log] [blame]
drha5d14fe2004-05-04 15:00:46 +00001/*
2** 2004 April 13
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12** This file contains routines used to translate between UTF-8,
13** UTF-16, UTF-16BE, and UTF-16LE.
14**
danielk1977193c72f2004-06-02 00:29:24 +000015** $Id: utf.c,v 1.16 2004/06/02 00:29:24 danielk1977 Exp $
drha5d14fe2004-05-04 15:00:46 +000016**
17** Notes on UTF-8:
18**
19** Byte-0 Byte-1 Byte-2 Byte-3 Value
20** 0xxxxxxx 00000000 00000000 0xxxxxxx
21** 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx
22** 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx
23** 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx
24**
25**
26** Notes on UTF-16: (with wwww+1==uuuuu)
27**
drh51846b52004-05-28 16:00:21 +000028** Word-0 Word-1 Value
29** 110110ww wwzzzzyy 110111yy yyxxxxxx 000uuuuu zzzzyyyy yyxxxxxx
30** zzzzyyyy yyxxxxxx 00000000 zzzzyyyy yyxxxxxx
drha5d14fe2004-05-04 15:00:46 +000031**
danielk1977998b56c2004-05-06 23:37:52 +000032**
drha5d14fe2004-05-04 15:00:46 +000033** BOM or Byte Order Mark:
34** 0xff 0xfe little-endian utf-16 follows
35** 0xfe 0xff big-endian utf-16 follows
danielk1977998b56c2004-05-06 23:37:52 +000036**
37**
38** Handling of malformed strings:
39**
40** SQLite accepts and processes malformed strings without an error wherever
41** possible. However this is not possible when converting between UTF-8 and
42** UTF-16.
43**
44** When converting malformed UTF-8 strings to UTF-16, one instance of the
45** replacement character U+FFFD for each byte that cannot be interpeted as
46** part of a valid unicode character.
47**
48** When converting malformed UTF-16 strings to UTF-8, one instance of the
49** replacement character U+FFFD for each pair of bytes that cannot be
50** interpeted as part of a valid unicode character.
drha5d14fe2004-05-04 15:00:46 +000051*/
danielk1977998b56c2004-05-06 23:37:52 +000052#include <assert.h>
danielk1977998b56c2004-05-06 23:37:52 +000053#include "sqliteInt.h"
54
55typedef struct UtfString UtfString;
56struct UtfString {
57 unsigned char *pZ; /* Raw string data */
58 int n; /* Allocated length of pZ in bytes */
59 int c; /* Number of pZ bytes already read or written */
60};
61
danielk1977998b56c2004-05-06 23:37:52 +000062/*
63** These two macros are used to interpret the first two bytes of the
64** unsigned char array pZ as a 16-bit unsigned int. BE16() for a big-endian
65** interpretation, LE16() for little-endian.
66*/
67#define BE16(pZ) (((u16)((pZ)[0])<<8) + (u16)((pZ)[1]))
68#define LE16(pZ) (((u16)((pZ)[1])<<8) + (u16)((pZ)[0]))
69
70/*
71** READ_16 interprets the first two bytes of the unsigned char array pZ
72** as a 16-bit unsigned int. If big_endian is non-zero the intepretation
73** is big-endian, otherwise little-endian.
74*/
75#define READ_16(pZ,big_endian) (big_endian?BE16(pZ):LE16(pZ))
76
77/*
78** Read the BOM from the start of *pStr, if one is present. Return zero
79** for little-endian, non-zero for big-endian. If no BOM is present, return
danielk1977b1bc9532004-05-22 03:05:33 +000080** the value of the parameter "big_endian".
danielk1977998b56c2004-05-06 23:37:52 +000081**
82** Return values:
83** 1 -> big-endian string
84** 0 -> little-endian string
85*/
danielk1977b1bc9532004-05-22 03:05:33 +000086static int readUtf16Bom(UtfString *pStr, int big_endian){
danielk1977998b56c2004-05-06 23:37:52 +000087 /* The BOM must be the first thing read from the string */
88 assert( pStr->c==0 );
89
90 /* If the string data consists of 1 byte or less, the BOM will make no
91 ** difference anyway. In this case just fall through to the default case
92 ** and return the native byte-order for this machine.
93 **
94 ** Otherwise, check the first 2 bytes of the string to see if a BOM is
95 ** present.
96 */
97 if( pStr->n>1 ){
danielk1977193c72f2004-06-02 00:29:24 +000098 u8 bom = sqlite3UtfReadBom(pStr->pZ, 2);
99 if( bom ){
100 pStr->c += 2;
101 return (bom==TEXT_Utf16le)?0:1;
danielk1977998b56c2004-05-06 23:37:52 +0000102 }
103 }
104
danielk1977b1bc9532004-05-22 03:05:33 +0000105 return big_endian;
danielk1977998b56c2004-05-06 23:37:52 +0000106}
107
danielk197793d46752004-05-23 13:30:58 +0000108/*
109** zData is a UTF-16 encoded string, nData bytes in length. This routine
110** checks if there is a byte-order mark at the start of zData. If no
111** byte order mark is found 0 is returned. Otherwise TEXT_Utf16be or
112** TEXT_Utf16le is returned, depending on whether The BOM indicates that
113** the text is big-endian or little-endian.
114*/
115u8 sqlite3UtfReadBom(const void *zData, int nData){
116 if( nData<0 || nData>1 ){
117 u8 b1 = *(u8 *)zData;
118 u8 b2 = *(((u8 *)zData) + 1);
119 if( b1==0xFE && b2==0xFF ){
120 return TEXT_Utf16be;
121 }
122 if( b1==0xFF && b2==0xFE ){
123 return TEXT_Utf16le;
124 }
125 }
126 return 0;
127}
128
danielk1977998b56c2004-05-06 23:37:52 +0000129
130/*
131** Read a single unicode character from the UTF-8 encoded string *pStr. The
132** value returned is a unicode scalar value. In the case of malformed
133** strings, the unicode replacement character U+FFFD may be returned.
134*/
135static u32 readUtf8(UtfString *pStr){
136 struct Utf8TblRow {
137 u8 b1_mask;
138 u8 b1_masked_val;
139 u8 b1_value_mask;
140 int trailing_bytes;
141 };
142 static const struct Utf8TblRow utf8tbl[] = {
143 { 0x80, 0x00, 0x7F, 0 },
144 { 0xE0, 0xC0, 0x1F, 1 },
145 { 0xF0, 0xE0, 0x0F, 2 },
146 { 0xF8, 0xF0, 0x0E, 3 },
147 { 0, 0, 0, 0}
148 };
149
150 u8 b1; /* First byte of the potentially multi-byte utf-8 character */
151 u32 ret = 0; /* Return value */
152 int ii;
153 struct Utf8TblRow const *pRow;
154
155 pRow = &(utf8tbl[0]);
156
157 b1 = pStr->pZ[pStr->c];
158 pStr->c++;
159 while( pRow->b1_mask && (b1&pRow->b1_mask)!=pRow->b1_masked_val ){
160 pRow++;
161 }
162 if( !pRow->b1_mask ){
163 return 0xFFFD;
164 }
165
166 ret = (u32)(b1&pRow->b1_value_mask);
167 for( ii=0; ii<pRow->trailing_bytes; ii++ ){
168 u8 b = pStr->pZ[pStr->c+ii];
169 if( (b&0xC0)!=0x80 ){
170 return 0xFFFD;
171 }
172 ret = (ret<<6) + (u32)(b&0x3F);
173 }
174
175 pStr->c += pRow->trailing_bytes;
176 return ret;
177}
178
179/*
180** Write the unicode character 'code' to the string pStr using UTF-8
181** encoding. SQLITE_NOMEM may be returned if sqlite3Malloc() fails.
182*/
183static int writeUtf8(UtfString *pStr, u32 code){
184 struct Utf8WriteTblRow {
185 u32 max_code;
186 int trailing_bytes;
187 u8 b1_and_mask;
188 u8 b1_or_mask;
189 };
190 static const struct Utf8WriteTblRow utf8tbl[] = {
191 {0x0000007F, 0, 0x7F, 0x00},
192 {0x000007FF, 1, 0xDF, 0xC0},
193 {0x0000FFFF, 2, 0xEF, 0xE0},
194 {0x0010FFFF, 3, 0xF7, 0xF0},
195 {0x00000000, 0, 0x00, 0x00}
196 };
danielk19776622cce2004-05-20 11:00:52 +0000197 const struct Utf8WriteTblRow *pRow = &utf8tbl[0];
danielk1977998b56c2004-05-06 23:37:52 +0000198
danielk1977295ba552004-05-19 10:34:51 +0000199 while( code>pRow->max_code ){
danielk1977998b56c2004-05-06 23:37:52 +0000200 assert( pRow->max_code );
201 pRow++;
202 }
203
204 /* Ensure there is enough room left in the output buffer to write
205 ** this UTF-8 character.
206 */
207 assert( (pStr->n-pStr->c)>=(pRow->trailing_bytes+1) );
208
209 /* Write the UTF-8 encoded character to pStr. All cases below are
210 ** intentionally fall-through.
211 */
212 switch( pRow->trailing_bytes ){
213 case 3:
214 pStr->pZ[pStr->c+3] = (((u8)code)&0x3F)|0x80;
215 code = code>>6;
216 case 2:
217 pStr->pZ[pStr->c+2] = (((u8)code)&0x3F)|0x80;
218 code = code>>6;
219 case 1:
220 pStr->pZ[pStr->c+1] = (((u8)code)&0x3F)|0x80;
221 code = code>>6;
222 case 0:
223 pStr->pZ[pStr->c] = (((u8)code)&(pRow->b1_and_mask))|(pRow->b1_or_mask);
224 }
225 pStr->c += (pRow->trailing_bytes + 1);
226
227 return 0;
228}
229
230/*
231** Read a single unicode character from the UTF-16 encoded string *pStr. The
232** value returned is a unicode scalar value. In the case of malformed
233** strings, the unicode replacement character U+FFFD may be returned.
234**
235** If big_endian is true, the string is assumed to be UTF-16BE encoded.
236** Otherwise, it is UTF-16LE encoded.
237*/
238static u32 readUtf16(UtfString *pStr, int big_endian){
239 u32 code_point; /* the first code-point in the character */
240
241 /* If there is only one byte of data left in the string, return the
242 ** replacement character.
243 */
244 if( (pStr->n-pStr->c)==1 ){
245 pStr->c++;
246 return (int)0xFFFD;
247 }
248
249 code_point = READ_16(&(pStr->pZ[pStr->c]), big_endian);
250 pStr->c += 2;
251
252 /* If this is a non-surrogate code-point, just cast it to an int and
253 ** return the code-point value.
254 */
255 if( code_point<0xD800 || code_point>0xE000 ){
256 return code_point;
257 }
258
259 /* If this is a trailing surrogate code-point, then the string is
260 ** malformed; return the replacement character.
261 */
262 if( code_point>0xDBFF ){
263 return 0xFFFD;
264 }
265
266 /* The code-point just read is a leading surrogate code-point. If their
267 ** is not enough data left or the next code-point is not a trailing
268 ** surrogate, return the replacement character.
269 */
270 if( (pStr->n-pStr->c)>1 ){
271 u32 code_point2 = READ_16(&pStr->pZ[pStr->c], big_endian);
272 if( code_point2<0xDC00 || code_point>0xDFFF ){
273 return 0xFFFD;
274 }
275 pStr->c += 2;
276
277 return (
278 (((code_point&0x03C0)+0x0040)<<16) + /* uuuuu */
279 ((code_point&0x003F)<<10) + /* xxxxxx */
280 (code_point2&0x03FF) /* yy yyyyyyyy */
281 );
282
283 }else{
284 return (int)0xFFFD;
285 }
286
287 /* not reached */
288}
289
290static int writeUtf16(UtfString *pStr, int code, int big_endian){
291 int bytes;
292 unsigned char *hi_byte;
293 unsigned char *lo_byte;
294
295 bytes = (code>0x0000FFFF?4:2);
296
297 /* Ensure there is enough room left in the output buffer to write
298 ** this UTF-8 character.
299 */
300 assert( (pStr->n-pStr->c)>=bytes );
301
302 /* Initialise hi_byte and lo_byte to point at the locations into which
303 ** the MSB and LSB of the (first) 16-bit unicode code-point written for
304 ** this character.
305 */
306 hi_byte = (big_endian?&pStr->pZ[pStr->c]:&pStr->pZ[pStr->c+1]);
307 lo_byte = (big_endian?&pStr->pZ[pStr->c+1]:&pStr->pZ[pStr->c]);
308
309 if( bytes==2 ){
310 *hi_byte = (u8)((code&0x0000FF00)>>8);
311 *lo_byte = (u8)(code&0x000000FF);
312 }else{
313 u32 wrd;
314 wrd = ((((code&0x001F0000)-0x00010000)+(code&0x0000FC00))>>10)|0x0000D800;
315 *hi_byte = (u8)((wrd&0x0000FF00)>>8);
316 *lo_byte = (u8)(wrd&0x000000FF);
317
318 wrd = (code&0x000003FF)|0x0000DC00;
319 *(hi_byte+2) = (u8)((wrd&0x0000FF00)>>8);
320 *(lo_byte+2) = (u8)(wrd&0x000000FF);
321 }
322
323 pStr->c += bytes;
324
325 return 0;
326}
327
328/*
danielk19776622cce2004-05-20 11:00:52 +0000329** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,
330** return the number of unicode characters in pZ up to (but not including)
331** the first 0x00 byte. If nByte is not less than zero, return the
332** number of unicode characters in the first nByte of pZ (or up to
333** the first 0x00, whichever comes first).
danielk1977998b56c2004-05-06 23:37:52 +0000334*/
danielk19776622cce2004-05-20 11:00:52 +0000335int sqlite3utf8CharLen(const char *pZ, int nByte){
336 UtfString str;
337 int ret = 0;
338 u32 code = 1;
339
340 str.pZ = (char *)pZ;
341 str.n = nByte;
342 str.c = 0;
343
344 while( (nByte<0 || str.c<str.n) && code!=0 ){
345 code = readUtf8(&str);
346 ret++;
danielk1977998b56c2004-05-06 23:37:52 +0000347 }
danielk19776622cce2004-05-20 11:00:52 +0000348 if( code==0 ) ret--;
349
350 return ret;
351}
352
353/*
354** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,
355** return the number of bytes up to (but not including), the first pair
356** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,
357** then return the number of bytes in the first nChar unicode characters
358** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).
359*/
360int sqlite3utf16ByteLen(const void *pZ, int nChar){
361 if( nChar<0 ){
danielk1977e7d00f52004-05-29 02:44:02 +0000362 const unsigned char *pC1 = (unsigned char *)pZ;
363 const unsigned char *pC2 = (unsigned char *)pZ+1;
danielk19776622cce2004-05-20 11:00:52 +0000364 while( *pC1 || *pC2 ){
365 pC1 += 2;
366 pC2 += 2;
367 }
368 return pC1-(unsigned char *)pZ;
369 }else{
370 UtfString str;
371 u32 code = 1;
372 int big_endian;
373 int nRead = 0;
374 int ret;
375
376 str.pZ = (char *)pZ;
377 str.c = 0;
378 str.n = -1;
379
danielk1977b1bc9532004-05-22 03:05:33 +0000380 /* Check for a BOM. We just ignore it if there is one, it's only read
381 ** so that it is not counted as a character.
382 */
383 big_endian = readUtf16Bom(&str, 0);
danielk19776622cce2004-05-20 11:00:52 +0000384 ret = 0-str.c;
385
386 while( code!=0 && nRead<nChar ){
387 code = readUtf16(&str, big_endian);
388 nRead++;
389 }
390 if( code==0 ){
391 ret -= 2;
392 }
393 return str.c + ret;
394 }
danielk1977998b56c2004-05-06 23:37:52 +0000395}
396
drha5d14fe2004-05-04 15:00:46 +0000397/*
398** Convert a string in UTF-16 native byte (or with a Byte-order-mark or
399** "BOM") into a UTF-8 string. The UTF-8 string is written into space
danielk1977998b56c2004-05-06 23:37:52 +0000400** obtained from sqlite3Malloc() and must be released by the calling function.
drha5d14fe2004-05-04 15:00:46 +0000401**
402** The parameter N is the number of bytes in the UTF-16 string. If N is
403** negative, the entire string up to the first \u0000 character is translated.
404**
405** The returned UTF-8 string is always \000 terminated.
406*/
danielk1977b1bc9532004-05-22 03:05:33 +0000407unsigned char *sqlite3utf16to8(const void *pData, int N, int big_endian){
danielk1977998b56c2004-05-06 23:37:52 +0000408 UtfString in;
409 UtfString out;
danielk1977998b56c2004-05-06 23:37:52 +0000410
411 out.pZ = 0;
412
413 in.pZ = (unsigned char *)pData;
414 in.n = N;
415 in.c = 0;
416
417 if( in.n<0 ){
danielk19776622cce2004-05-20 11:00:52 +0000418 in.n = sqlite3utf16ByteLen(in.pZ, -1);
danielk1977998b56c2004-05-06 23:37:52 +0000419 }
420
421 /* A UTF-8 encoding of a unicode string can require at most 1.5 times as
422 ** much space to store as the same string encoded using UTF-16. Allocate
423 ** this now.
424 */
425 out.n = (in.n*1.5) + 1;
danielk1977295ba552004-05-19 10:34:51 +0000426 out.pZ = sqliteMalloc(out.n);
danielk1977998b56c2004-05-06 23:37:52 +0000427 if( !out.pZ ){
428 return 0;
429 }
430 out.c = 0;
431
danielk1977b1bc9532004-05-22 03:05:33 +0000432 big_endian = readUtf16Bom(&in, big_endian);
danielk1977998b56c2004-05-06 23:37:52 +0000433 while( in.c<in.n ){
434 writeUtf8(&out, readUtf16(&in, big_endian));
435 }
436
437 /* Add the NULL-terminator character */
438 assert( out.c<out.n );
439 out.pZ[out.c] = 0x00;
440
441 return out.pZ;
442}
443
444static void *utf8toUtf16(const unsigned char *pIn, int N, int big_endian){
445 UtfString in;
446 UtfString out;
447
448 in.pZ = (unsigned char *)pIn;
449 in.n = N;
450 in.c = 0;
451
452 if( in.n<0 ){
453 in.n = strlen(in.pZ);
454 }
455
456 /* A UTF-16 encoding of a unicode string can require at most twice as
457 ** much space to store as the same string encoded using UTF-8. Allocate
458 ** this now.
459 */
460 out.n = (in.n*2) + 2;
danielk1977295ba552004-05-19 10:34:51 +0000461 out.pZ = sqliteMalloc(out.n);
danielk1977998b56c2004-05-06 23:37:52 +0000462 if( !out.pZ ){
463 return 0;
464 }
465 out.c = 0;
466
467 while( in.c<in.n ){
468 writeUtf16(&out, readUtf8(&in), big_endian);
469 }
470
471 /* Add the NULL-terminator character */
472 assert( (out.c+1)<out.n );
473 out.pZ[out.c] = 0x00;
474 out.pZ[out.c+1] = 0x00;
475
476 return out.pZ;
477}
478
479/*
480** Translate UTF-8 to UTF-16BE or UTF-16LE
481*/
482void *sqlite3utf8to16be(const unsigned char *pIn, int N){
483 return utf8toUtf16(pIn, N, 1);
484}
485
486void *sqlite3utf8to16le(const unsigned char *pIn, int N){
487 return utf8toUtf16(pIn, N, 0);
488}
489
490/*
491** This routine does the work for sqlite3utf16to16le() and
492** sqlite3utf16to16be(). If big_endian is 1 the input string is
493** transformed in place to UTF-16BE encoding. If big_endian is 0 then
494** the input is transformed to UTF-16LE.
495**
496** Unless the first two bytes of the input string is a BOM, the input is
497** assumed to be UTF-16 encoded using the machines native byte ordering.
498*/
499static void utf16to16(void *pData, int N, int big_endian){
500 UtfString inout;
501 inout.pZ = (unsigned char *)pData;
502 inout.c = 0;
503 inout.n = N;
504
505 if( inout.n<0 ){
danielk19776622cce2004-05-20 11:00:52 +0000506 inout.n = sqlite3utf16ByteLen(inout.pZ, -1);
danielk1977998b56c2004-05-06 23:37:52 +0000507 }
508
drh9c054832004-05-31 18:51:57 +0000509 if( readUtf16Bom(&inout, SQLITE_BIGENDIAN)!=big_endian ){
danielk1977295ba552004-05-19 10:34:51 +0000510 /* swab(&inout.pZ[inout.c], inout.pZ, inout.n-inout.c); */
511 int i;
512 for(i=0; i<(inout.n-inout.c); i += 2){
513 char c1 = inout.pZ[i+inout.c];
514 char c2 = inout.pZ[i+inout.c+1];
515 inout.pZ[i] = c2;
516 inout.pZ[i+1] = c1;
517 }
danielk1977998b56c2004-05-06 23:37:52 +0000518 }else if( inout.c ){
519 memmove(inout.pZ, &inout.pZ[inout.c], inout.n-inout.c);
520 }
danielk1977295ba552004-05-19 10:34:51 +0000521
522 inout.pZ[inout.n-inout.c] = 0x00;
523 inout.pZ[inout.n-inout.c+1] = 0x00;
drha5d14fe2004-05-04 15:00:46 +0000524}
525
526/*
527** Convert a string in UTF-16 native byte or with a BOM into a UTF-16LE
528** string. The conversion occurs in-place. The output overwrites the
529** input. N bytes are converted. If N is negative everything is converted
530** up to the first \u0000 character.
531**
532** If the native byte order is little-endian and there is no BOM, then
533** this routine is a no-op. If there is a BOM at the start of the string,
534** it is removed.
danielk1977998b56c2004-05-06 23:37:52 +0000535**
drha5d14fe2004-05-04 15:00:46 +0000536** Translation from UTF-16LE to UTF-16BE and back again is accomplished
537** using the library function swab().
538*/
danielk1977998b56c2004-05-06 23:37:52 +0000539void sqlite3utf16to16le(void *pData, int N){
540 utf16to16(pData, N, 0);
541}
drha5d14fe2004-05-04 15:00:46 +0000542
543/*
danielk1977998b56c2004-05-06 23:37:52 +0000544** Convert a string in UTF-16 native byte or with a BOM into a UTF-16BE
545** string. The conversion occurs in-place. The output overwrites the
546** input. N bytes are converted. If N is negative everything is converted
547** up to the first \u0000 character.
548**
549** If the native byte order is little-endian and there is no BOM, then
550** this routine is a no-op. If there is a BOM at the start of the string,
551** it is removed.
552**
553** Translation from UTF-16LE to UTF-16BE and back again is accomplished
554** using the library function swab().
drha5d14fe2004-05-04 15:00:46 +0000555*/
danielk1977998b56c2004-05-06 23:37:52 +0000556void sqlite3utf16to16be(void *pData, int N){
557 utf16to16(pData, N, 1);
drha5d14fe2004-05-04 15:00:46 +0000558}
danielk1977998b56c2004-05-06 23:37:52 +0000559
danielk1977b1bc9532004-05-22 03:05:33 +0000560/*
561** This function is used to translate between UTF-8 and UTF-16. The
562** result is returned in dynamically allocated memory.
563*/
564int sqlite3utfTranslate(
drheb2e1762004-05-27 01:53:56 +0000565 const void *zData, int nData, /* Input string */
566 u8 enc1, /* Encoding of zData */
567 void **zOut, int *nOut, /* Output string */
568 u8 enc2 /* Desired encoding of output */
danielk1977b1bc9532004-05-22 03:05:33 +0000569){
570 assert( enc1==TEXT_Utf8 || enc1==TEXT_Utf16le || enc1==TEXT_Utf16be );
571 assert( enc2==TEXT_Utf8 || enc2==TEXT_Utf16le || enc2==TEXT_Utf16be );
572 assert(
573 (enc1==TEXT_Utf8 && (enc2==TEXT_Utf16le || enc2==TEXT_Utf16be)) ||
574 (enc2==TEXT_Utf8 && (enc1==TEXT_Utf16le || enc1==TEXT_Utf16be))
575 );
danielk19774adee202004-05-08 08:23:19 +0000576
danielk1977b1bc9532004-05-22 03:05:33 +0000577 if( enc1==TEXT_Utf8 ){
578 if( enc2==TEXT_Utf16le ){
579 *zOut = sqlite3utf8to16le(zData, nData);
580 }else{
581 *zOut = sqlite3utf8to16be(zData, nData);
582 }
583 if( !(*zOut) ) return SQLITE_NOMEM;
danielk1977c572ef72004-05-27 09:28:41 +0000584 *nOut = sqlite3utf16ByteLen(*zOut, -1);
danielk1977b1bc9532004-05-22 03:05:33 +0000585 }else{
586 *zOut = sqlite3utf16to8(zData, nData, enc1==TEXT_Utf16be);
587 if( !(*zOut) ) return SQLITE_NOMEM;
danielk1977c572ef72004-05-27 09:28:41 +0000588 *nOut = strlen(*zOut);
danielk1977b1bc9532004-05-22 03:05:33 +0000589 }
590 return SQLITE_OK;
591}