blob: 72944c8120f8a94b049ad6f353ec201cd8f4a9d2 [file] [log] [blame]
drha5d14fe2004-05-04 15:00:46 +00001/*
2** 2004 April 13
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12** This file contains routines used to translate between UTF-8,
13** UTF-16, UTF-16BE, and UTF-16LE.
14**
danielk1977e7d00f52004-05-29 02:44:02 +000015** $Id: utf.c,v 1.14 2004/05/29 02:44:02 danielk1977 Exp $
drha5d14fe2004-05-04 15:00:46 +000016**
17** Notes on UTF-8:
18**
19** Byte-0 Byte-1 Byte-2 Byte-3 Value
20** 0xxxxxxx 00000000 00000000 0xxxxxxx
21** 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx
22** 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx
23** 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx
24**
25**
26** Notes on UTF-16: (with wwww+1==uuuuu)
27**
drh51846b52004-05-28 16:00:21 +000028** Word-0 Word-1 Value
29** 110110ww wwzzzzyy 110111yy yyxxxxxx 000uuuuu zzzzyyyy yyxxxxxx
30** zzzzyyyy yyxxxxxx 00000000 zzzzyyyy yyxxxxxx
drha5d14fe2004-05-04 15:00:46 +000031**
danielk1977998b56c2004-05-06 23:37:52 +000032**
drha5d14fe2004-05-04 15:00:46 +000033** BOM or Byte Order Mark:
34** 0xff 0xfe little-endian utf-16 follows
35** 0xfe 0xff big-endian utf-16 follows
danielk1977998b56c2004-05-06 23:37:52 +000036**
37**
38** Handling of malformed strings:
39**
40** SQLite accepts and processes malformed strings without an error wherever
41** possible. However this is not possible when converting between UTF-8 and
42** UTF-16.
43**
44** When converting malformed UTF-8 strings to UTF-16, one instance of the
45** replacement character U+FFFD for each byte that cannot be interpeted as
46** part of a valid unicode character.
47**
48** When converting malformed UTF-16 strings to UTF-8, one instance of the
49** replacement character U+FFFD for each pair of bytes that cannot be
50** interpeted as part of a valid unicode character.
drha5d14fe2004-05-04 15:00:46 +000051*/
danielk1977998b56c2004-05-06 23:37:52 +000052#include <assert.h>
danielk1977998b56c2004-05-06 23:37:52 +000053#include "sqliteInt.h"
54
55typedef struct UtfString UtfString;
56struct UtfString {
57 unsigned char *pZ; /* Raw string data */
58 int n; /* Allocated length of pZ in bytes */
59 int c; /* Number of pZ bytes already read or written */
60};
61
62/* TODO: Implement this macro in os.h. It should be 1 on big-endian
63** machines, and 0 on little-endian.
64*/
65#define SQLITE3_NATIVE_BIGENDIAN 0
66
67#if SQLITE3_NATIVE_BIGENDIAN == 1
68#define BOM_BIGENDIAN 0x0000FFFE
69#define BOM_LITTLEENDIAN 0x0000FEFF
70#else
71#define BOM_BIGENDIAN 0x0000FEFF
72#define BOM_LITTLEENDIAN 0x0000FFFE
73#endif
74
75/*
76** These two macros are used to interpret the first two bytes of the
77** unsigned char array pZ as a 16-bit unsigned int. BE16() for a big-endian
78** interpretation, LE16() for little-endian.
79*/
80#define BE16(pZ) (((u16)((pZ)[0])<<8) + (u16)((pZ)[1]))
81#define LE16(pZ) (((u16)((pZ)[1])<<8) + (u16)((pZ)[0]))
82
83/*
84** READ_16 interprets the first two bytes of the unsigned char array pZ
85** as a 16-bit unsigned int. If big_endian is non-zero the intepretation
86** is big-endian, otherwise little-endian.
87*/
88#define READ_16(pZ,big_endian) (big_endian?BE16(pZ):LE16(pZ))
89
90/*
91** Read the BOM from the start of *pStr, if one is present. Return zero
92** for little-endian, non-zero for big-endian. If no BOM is present, return
danielk1977b1bc9532004-05-22 03:05:33 +000093** the value of the parameter "big_endian".
danielk1977998b56c2004-05-06 23:37:52 +000094**
95** Return values:
96** 1 -> big-endian string
97** 0 -> little-endian string
98*/
danielk1977b1bc9532004-05-22 03:05:33 +000099static int readUtf16Bom(UtfString *pStr, int big_endian){
danielk1977998b56c2004-05-06 23:37:52 +0000100 /* The BOM must be the first thing read from the string */
101 assert( pStr->c==0 );
102
103 /* If the string data consists of 1 byte or less, the BOM will make no
104 ** difference anyway. In this case just fall through to the default case
105 ** and return the native byte-order for this machine.
106 **
107 ** Otherwise, check the first 2 bytes of the string to see if a BOM is
108 ** present.
109 */
110 if( pStr->n>1 ){
111 u32 bom = BE16(pStr->pZ);
112 if( bom==BOM_BIGENDIAN ){
113 pStr->c = 2;
114 return 1;
115 }
116 if( bom==BOM_LITTLEENDIAN ){
117 pStr->c = 2;
118 return 0;
119 }
120 }
121
danielk1977b1bc9532004-05-22 03:05:33 +0000122 return big_endian;
danielk1977998b56c2004-05-06 23:37:52 +0000123}
124
danielk197793d46752004-05-23 13:30:58 +0000125/*
126** zData is a UTF-16 encoded string, nData bytes in length. This routine
127** checks if there is a byte-order mark at the start of zData. If no
128** byte order mark is found 0 is returned. Otherwise TEXT_Utf16be or
129** TEXT_Utf16le is returned, depending on whether The BOM indicates that
130** the text is big-endian or little-endian.
131*/
132u8 sqlite3UtfReadBom(const void *zData, int nData){
133 if( nData<0 || nData>1 ){
134 u8 b1 = *(u8 *)zData;
135 u8 b2 = *(((u8 *)zData) + 1);
136 if( b1==0xFE && b2==0xFF ){
137 return TEXT_Utf16be;
138 }
139 if( b1==0xFF && b2==0xFE ){
140 return TEXT_Utf16le;
141 }
142 }
143 return 0;
144}
145
danielk1977998b56c2004-05-06 23:37:52 +0000146
147/*
148** Read a single unicode character from the UTF-8 encoded string *pStr. The
149** value returned is a unicode scalar value. In the case of malformed
150** strings, the unicode replacement character U+FFFD may be returned.
151*/
152static u32 readUtf8(UtfString *pStr){
153 struct Utf8TblRow {
154 u8 b1_mask;
155 u8 b1_masked_val;
156 u8 b1_value_mask;
157 int trailing_bytes;
158 };
159 static const struct Utf8TblRow utf8tbl[] = {
160 { 0x80, 0x00, 0x7F, 0 },
161 { 0xE0, 0xC0, 0x1F, 1 },
162 { 0xF0, 0xE0, 0x0F, 2 },
163 { 0xF8, 0xF0, 0x0E, 3 },
164 { 0, 0, 0, 0}
165 };
166
167 u8 b1; /* First byte of the potentially multi-byte utf-8 character */
168 u32 ret = 0; /* Return value */
169 int ii;
170 struct Utf8TblRow const *pRow;
171
172 pRow = &(utf8tbl[0]);
173
174 b1 = pStr->pZ[pStr->c];
175 pStr->c++;
176 while( pRow->b1_mask && (b1&pRow->b1_mask)!=pRow->b1_masked_val ){
177 pRow++;
178 }
179 if( !pRow->b1_mask ){
180 return 0xFFFD;
181 }
182
183 ret = (u32)(b1&pRow->b1_value_mask);
184 for( ii=0; ii<pRow->trailing_bytes; ii++ ){
185 u8 b = pStr->pZ[pStr->c+ii];
186 if( (b&0xC0)!=0x80 ){
187 return 0xFFFD;
188 }
189 ret = (ret<<6) + (u32)(b&0x3F);
190 }
191
192 pStr->c += pRow->trailing_bytes;
193 return ret;
194}
195
196/*
197** Write the unicode character 'code' to the string pStr using UTF-8
198** encoding. SQLITE_NOMEM may be returned if sqlite3Malloc() fails.
199*/
200static int writeUtf8(UtfString *pStr, u32 code){
201 struct Utf8WriteTblRow {
202 u32 max_code;
203 int trailing_bytes;
204 u8 b1_and_mask;
205 u8 b1_or_mask;
206 };
207 static const struct Utf8WriteTblRow utf8tbl[] = {
208 {0x0000007F, 0, 0x7F, 0x00},
209 {0x000007FF, 1, 0xDF, 0xC0},
210 {0x0000FFFF, 2, 0xEF, 0xE0},
211 {0x0010FFFF, 3, 0xF7, 0xF0},
212 {0x00000000, 0, 0x00, 0x00}
213 };
danielk19776622cce2004-05-20 11:00:52 +0000214 const struct Utf8WriteTblRow *pRow = &utf8tbl[0];
danielk1977998b56c2004-05-06 23:37:52 +0000215
danielk1977295ba552004-05-19 10:34:51 +0000216 while( code>pRow->max_code ){
danielk1977998b56c2004-05-06 23:37:52 +0000217 assert( pRow->max_code );
218 pRow++;
219 }
220
221 /* Ensure there is enough room left in the output buffer to write
222 ** this UTF-8 character.
223 */
224 assert( (pStr->n-pStr->c)>=(pRow->trailing_bytes+1) );
225
226 /* Write the UTF-8 encoded character to pStr. All cases below are
227 ** intentionally fall-through.
228 */
229 switch( pRow->trailing_bytes ){
230 case 3:
231 pStr->pZ[pStr->c+3] = (((u8)code)&0x3F)|0x80;
232 code = code>>6;
233 case 2:
234 pStr->pZ[pStr->c+2] = (((u8)code)&0x3F)|0x80;
235 code = code>>6;
236 case 1:
237 pStr->pZ[pStr->c+1] = (((u8)code)&0x3F)|0x80;
238 code = code>>6;
239 case 0:
240 pStr->pZ[pStr->c] = (((u8)code)&(pRow->b1_and_mask))|(pRow->b1_or_mask);
241 }
242 pStr->c += (pRow->trailing_bytes + 1);
243
244 return 0;
245}
246
247/*
248** Read a single unicode character from the UTF-16 encoded string *pStr. The
249** value returned is a unicode scalar value. In the case of malformed
250** strings, the unicode replacement character U+FFFD may be returned.
251**
252** If big_endian is true, the string is assumed to be UTF-16BE encoded.
253** Otherwise, it is UTF-16LE encoded.
254*/
255static u32 readUtf16(UtfString *pStr, int big_endian){
256 u32 code_point; /* the first code-point in the character */
257
258 /* If there is only one byte of data left in the string, return the
259 ** replacement character.
260 */
261 if( (pStr->n-pStr->c)==1 ){
262 pStr->c++;
263 return (int)0xFFFD;
264 }
265
266 code_point = READ_16(&(pStr->pZ[pStr->c]), big_endian);
267 pStr->c += 2;
268
269 /* If this is a non-surrogate code-point, just cast it to an int and
270 ** return the code-point value.
271 */
272 if( code_point<0xD800 || code_point>0xE000 ){
273 return code_point;
274 }
275
276 /* If this is a trailing surrogate code-point, then the string is
277 ** malformed; return the replacement character.
278 */
279 if( code_point>0xDBFF ){
280 return 0xFFFD;
281 }
282
283 /* The code-point just read is a leading surrogate code-point. If their
284 ** is not enough data left or the next code-point is not a trailing
285 ** surrogate, return the replacement character.
286 */
287 if( (pStr->n-pStr->c)>1 ){
288 u32 code_point2 = READ_16(&pStr->pZ[pStr->c], big_endian);
289 if( code_point2<0xDC00 || code_point>0xDFFF ){
290 return 0xFFFD;
291 }
292 pStr->c += 2;
293
294 return (
295 (((code_point&0x03C0)+0x0040)<<16) + /* uuuuu */
296 ((code_point&0x003F)<<10) + /* xxxxxx */
297 (code_point2&0x03FF) /* yy yyyyyyyy */
298 );
299
300 }else{
301 return (int)0xFFFD;
302 }
303
304 /* not reached */
305}
306
307static int writeUtf16(UtfString *pStr, int code, int big_endian){
308 int bytes;
309 unsigned char *hi_byte;
310 unsigned char *lo_byte;
311
312 bytes = (code>0x0000FFFF?4:2);
313
314 /* Ensure there is enough room left in the output buffer to write
315 ** this UTF-8 character.
316 */
317 assert( (pStr->n-pStr->c)>=bytes );
318
319 /* Initialise hi_byte and lo_byte to point at the locations into which
320 ** the MSB and LSB of the (first) 16-bit unicode code-point written for
321 ** this character.
322 */
323 hi_byte = (big_endian?&pStr->pZ[pStr->c]:&pStr->pZ[pStr->c+1]);
324 lo_byte = (big_endian?&pStr->pZ[pStr->c+1]:&pStr->pZ[pStr->c]);
325
326 if( bytes==2 ){
327 *hi_byte = (u8)((code&0x0000FF00)>>8);
328 *lo_byte = (u8)(code&0x000000FF);
329 }else{
330 u32 wrd;
331 wrd = ((((code&0x001F0000)-0x00010000)+(code&0x0000FC00))>>10)|0x0000D800;
332 *hi_byte = (u8)((wrd&0x0000FF00)>>8);
333 *lo_byte = (u8)(wrd&0x000000FF);
334
335 wrd = (code&0x000003FF)|0x0000DC00;
336 *(hi_byte+2) = (u8)((wrd&0x0000FF00)>>8);
337 *(lo_byte+2) = (u8)(wrd&0x000000FF);
338 }
339
340 pStr->c += bytes;
341
342 return 0;
343}
344
345/*
danielk19776622cce2004-05-20 11:00:52 +0000346** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,
347** return the number of unicode characters in pZ up to (but not including)
348** the first 0x00 byte. If nByte is not less than zero, return the
349** number of unicode characters in the first nByte of pZ (or up to
350** the first 0x00, whichever comes first).
danielk1977998b56c2004-05-06 23:37:52 +0000351*/
danielk19776622cce2004-05-20 11:00:52 +0000352int sqlite3utf8CharLen(const char *pZ, int nByte){
353 UtfString str;
354 int ret = 0;
355 u32 code = 1;
356
357 str.pZ = (char *)pZ;
358 str.n = nByte;
359 str.c = 0;
360
361 while( (nByte<0 || str.c<str.n) && code!=0 ){
362 code = readUtf8(&str);
363 ret++;
danielk1977998b56c2004-05-06 23:37:52 +0000364 }
danielk19776622cce2004-05-20 11:00:52 +0000365 if( code==0 ) ret--;
366
367 return ret;
368}
369
370/*
371** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,
372** return the number of bytes up to (but not including), the first pair
373** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,
374** then return the number of bytes in the first nChar unicode characters
375** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).
376*/
377int sqlite3utf16ByteLen(const void *pZ, int nChar){
378 if( nChar<0 ){
danielk1977e7d00f52004-05-29 02:44:02 +0000379 const unsigned char *pC1 = (unsigned char *)pZ;
380 const unsigned char *pC2 = (unsigned char *)pZ+1;
danielk19776622cce2004-05-20 11:00:52 +0000381 while( *pC1 || *pC2 ){
382 pC1 += 2;
383 pC2 += 2;
384 }
385 return pC1-(unsigned char *)pZ;
386 }else{
387 UtfString str;
388 u32 code = 1;
389 int big_endian;
390 int nRead = 0;
391 int ret;
392
393 str.pZ = (char *)pZ;
394 str.c = 0;
395 str.n = -1;
396
danielk1977b1bc9532004-05-22 03:05:33 +0000397 /* Check for a BOM. We just ignore it if there is one, it's only read
398 ** so that it is not counted as a character.
399 */
400 big_endian = readUtf16Bom(&str, 0);
danielk19776622cce2004-05-20 11:00:52 +0000401 ret = 0-str.c;
402
403 while( code!=0 && nRead<nChar ){
404 code = readUtf16(&str, big_endian);
405 nRead++;
406 }
407 if( code==0 ){
408 ret -= 2;
409 }
410 return str.c + ret;
411 }
danielk1977998b56c2004-05-06 23:37:52 +0000412}
413
drha5d14fe2004-05-04 15:00:46 +0000414/*
415** Convert a string in UTF-16 native byte (or with a Byte-order-mark or
416** "BOM") into a UTF-8 string. The UTF-8 string is written into space
danielk1977998b56c2004-05-06 23:37:52 +0000417** obtained from sqlite3Malloc() and must be released by the calling function.
drha5d14fe2004-05-04 15:00:46 +0000418**
419** The parameter N is the number of bytes in the UTF-16 string. If N is
420** negative, the entire string up to the first \u0000 character is translated.
421**
422** The returned UTF-8 string is always \000 terminated.
423*/
danielk1977b1bc9532004-05-22 03:05:33 +0000424unsigned char *sqlite3utf16to8(const void *pData, int N, int big_endian){
danielk1977998b56c2004-05-06 23:37:52 +0000425 UtfString in;
426 UtfString out;
danielk1977998b56c2004-05-06 23:37:52 +0000427
428 out.pZ = 0;
429
430 in.pZ = (unsigned char *)pData;
431 in.n = N;
432 in.c = 0;
433
434 if( in.n<0 ){
danielk19776622cce2004-05-20 11:00:52 +0000435 in.n = sqlite3utf16ByteLen(in.pZ, -1);
danielk1977998b56c2004-05-06 23:37:52 +0000436 }
437
438 /* A UTF-8 encoding of a unicode string can require at most 1.5 times as
439 ** much space to store as the same string encoded using UTF-16. Allocate
440 ** this now.
441 */
442 out.n = (in.n*1.5) + 1;
danielk1977295ba552004-05-19 10:34:51 +0000443 out.pZ = sqliteMalloc(out.n);
danielk1977998b56c2004-05-06 23:37:52 +0000444 if( !out.pZ ){
445 return 0;
446 }
447 out.c = 0;
448
danielk1977b1bc9532004-05-22 03:05:33 +0000449 big_endian = readUtf16Bom(&in, big_endian);
danielk1977998b56c2004-05-06 23:37:52 +0000450 while( in.c<in.n ){
451 writeUtf8(&out, readUtf16(&in, big_endian));
452 }
453
454 /* Add the NULL-terminator character */
455 assert( out.c<out.n );
456 out.pZ[out.c] = 0x00;
457
458 return out.pZ;
459}
460
461static void *utf8toUtf16(const unsigned char *pIn, int N, int big_endian){
462 UtfString in;
463 UtfString out;
464
465 in.pZ = (unsigned char *)pIn;
466 in.n = N;
467 in.c = 0;
468
469 if( in.n<0 ){
470 in.n = strlen(in.pZ);
471 }
472
473 /* A UTF-16 encoding of a unicode string can require at most twice as
474 ** much space to store as the same string encoded using UTF-8. Allocate
475 ** this now.
476 */
477 out.n = (in.n*2) + 2;
danielk1977295ba552004-05-19 10:34:51 +0000478 out.pZ = sqliteMalloc(out.n);
danielk1977998b56c2004-05-06 23:37:52 +0000479 if( !out.pZ ){
480 return 0;
481 }
482 out.c = 0;
483
484 while( in.c<in.n ){
485 writeUtf16(&out, readUtf8(&in), big_endian);
486 }
487
488 /* Add the NULL-terminator character */
489 assert( (out.c+1)<out.n );
490 out.pZ[out.c] = 0x00;
491 out.pZ[out.c+1] = 0x00;
492
493 return out.pZ;
494}
495
496/*
497** Translate UTF-8 to UTF-16BE or UTF-16LE
498*/
499void *sqlite3utf8to16be(const unsigned char *pIn, int N){
500 return utf8toUtf16(pIn, N, 1);
501}
502
503void *sqlite3utf8to16le(const unsigned char *pIn, int N){
504 return utf8toUtf16(pIn, N, 0);
505}
506
507/*
508** This routine does the work for sqlite3utf16to16le() and
509** sqlite3utf16to16be(). If big_endian is 1 the input string is
510** transformed in place to UTF-16BE encoding. If big_endian is 0 then
511** the input is transformed to UTF-16LE.
512**
513** Unless the first two bytes of the input string is a BOM, the input is
514** assumed to be UTF-16 encoded using the machines native byte ordering.
515*/
516static void utf16to16(void *pData, int N, int big_endian){
517 UtfString inout;
518 inout.pZ = (unsigned char *)pData;
519 inout.c = 0;
520 inout.n = N;
521
522 if( inout.n<0 ){
danielk19776622cce2004-05-20 11:00:52 +0000523 inout.n = sqlite3utf16ByteLen(inout.pZ, -1);
danielk1977998b56c2004-05-06 23:37:52 +0000524 }
525
danielk1977b1bc9532004-05-22 03:05:33 +0000526 if( readUtf16Bom(&inout, SQLITE3_BIGENDIAN)!=big_endian ){
danielk1977295ba552004-05-19 10:34:51 +0000527 /* swab(&inout.pZ[inout.c], inout.pZ, inout.n-inout.c); */
528 int i;
529 for(i=0; i<(inout.n-inout.c); i += 2){
530 char c1 = inout.pZ[i+inout.c];
531 char c2 = inout.pZ[i+inout.c+1];
532 inout.pZ[i] = c2;
533 inout.pZ[i+1] = c1;
534 }
danielk1977998b56c2004-05-06 23:37:52 +0000535 }else if( inout.c ){
536 memmove(inout.pZ, &inout.pZ[inout.c], inout.n-inout.c);
537 }
danielk1977295ba552004-05-19 10:34:51 +0000538
539 inout.pZ[inout.n-inout.c] = 0x00;
540 inout.pZ[inout.n-inout.c+1] = 0x00;
drha5d14fe2004-05-04 15:00:46 +0000541}
542
543/*
544** Convert a string in UTF-16 native byte or with a BOM into a UTF-16LE
545** string. The conversion occurs in-place. The output overwrites the
546** input. N bytes are converted. If N is negative everything is converted
547** up to the first \u0000 character.
548**
549** If the native byte order is little-endian and there is no BOM, then
550** this routine is a no-op. If there is a BOM at the start of the string,
551** it is removed.
danielk1977998b56c2004-05-06 23:37:52 +0000552**
drha5d14fe2004-05-04 15:00:46 +0000553** Translation from UTF-16LE to UTF-16BE and back again is accomplished
554** using the library function swab().
555*/
danielk1977998b56c2004-05-06 23:37:52 +0000556void sqlite3utf16to16le(void *pData, int N){
557 utf16to16(pData, N, 0);
558}
drha5d14fe2004-05-04 15:00:46 +0000559
560/*
danielk1977998b56c2004-05-06 23:37:52 +0000561** Convert a string in UTF-16 native byte or with a BOM into a UTF-16BE
562** string. The conversion occurs in-place. The output overwrites the
563** input. N bytes are converted. If N is negative everything is converted
564** up to the first \u0000 character.
565**
566** If the native byte order is little-endian and there is no BOM, then
567** this routine is a no-op. If there is a BOM at the start of the string,
568** it is removed.
569**
570** Translation from UTF-16LE to UTF-16BE and back again is accomplished
571** using the library function swab().
drha5d14fe2004-05-04 15:00:46 +0000572*/
danielk1977998b56c2004-05-06 23:37:52 +0000573void sqlite3utf16to16be(void *pData, int N){
574 utf16to16(pData, N, 1);
drha5d14fe2004-05-04 15:00:46 +0000575}
danielk1977998b56c2004-05-06 23:37:52 +0000576
danielk1977b1bc9532004-05-22 03:05:33 +0000577/*
578** This function is used to translate between UTF-8 and UTF-16. The
579** result is returned in dynamically allocated memory.
580*/
581int sqlite3utfTranslate(
drheb2e1762004-05-27 01:53:56 +0000582 const void *zData, int nData, /* Input string */
583 u8 enc1, /* Encoding of zData */
584 void **zOut, int *nOut, /* Output string */
585 u8 enc2 /* Desired encoding of output */
danielk1977b1bc9532004-05-22 03:05:33 +0000586){
587 assert( enc1==TEXT_Utf8 || enc1==TEXT_Utf16le || enc1==TEXT_Utf16be );
588 assert( enc2==TEXT_Utf8 || enc2==TEXT_Utf16le || enc2==TEXT_Utf16be );
589 assert(
590 (enc1==TEXT_Utf8 && (enc2==TEXT_Utf16le || enc2==TEXT_Utf16be)) ||
591 (enc2==TEXT_Utf8 && (enc1==TEXT_Utf16le || enc1==TEXT_Utf16be))
592 );
danielk19774adee202004-05-08 08:23:19 +0000593
danielk1977b1bc9532004-05-22 03:05:33 +0000594 if( enc1==TEXT_Utf8 ){
595 if( enc2==TEXT_Utf16le ){
596 *zOut = sqlite3utf8to16le(zData, nData);
597 }else{
598 *zOut = sqlite3utf8to16be(zData, nData);
599 }
600 if( !(*zOut) ) return SQLITE_NOMEM;
danielk1977c572ef72004-05-27 09:28:41 +0000601 *nOut = sqlite3utf16ByteLen(*zOut, -1);
danielk1977b1bc9532004-05-22 03:05:33 +0000602 }else{
603 *zOut = sqlite3utf16to8(zData, nData, enc1==TEXT_Utf16be);
604 if( !(*zOut) ) return SQLITE_NOMEM;
danielk1977c572ef72004-05-27 09:28:41 +0000605 *nOut = strlen(*zOut);
danielk1977b1bc9532004-05-22 03:05:33 +0000606 }
607 return SQLITE_OK;
608}