blob: ff3fa6f5539d4b7b7e4785ab9d8946069790279a [file] [log] [blame]
danielk1977998b56c2004-05-06 23:37:52 +00001/*
2** 2001 September 15
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12** Code for testing the utf.c module in SQLite. This code
13** is not included in the SQLite library. It is used for automated
danielk1977295ba552004-05-19 10:34:51 +000014** testing of the SQLite library. Specifically, the code in this file
15** is used for testing the SQLite routines for converting between
16** the various supported unicode encodings.
danielk1977998b56c2004-05-06 23:37:52 +000017**
danielk197724162fe2004-06-04 06:22:00 +000018** $Id: test5.c,v 1.8 2004/06/04 06:22:02 danielk1977 Exp $
danielk1977998b56c2004-05-06 23:37:52 +000019*/
20#include "sqliteInt.h"
drh9c054832004-05-31 18:51:57 +000021#include "os.h" /* to get SQLITE_BIGENDIAN */
danielk1977998b56c2004-05-06 23:37:52 +000022#include "tcl.h"
23#include <stdlib.h>
24#include <string.h>
25
26/*
danielk1977295ba552004-05-19 10:34:51 +000027** Return the number of bytes up to and including the first pair of
28** 0x00 bytes in *pStr.
danielk1977998b56c2004-05-06 23:37:52 +000029*/
30static int utf16_length(const unsigned char *pZ){
31 const unsigned char *pC1 = pZ;
32 const unsigned char *pC2 = pZ+1;
33 while( *pC1 || *pC2 ){
34 pC1 += 2;
35 pC2 += 2;
36 }
37 return (pC1-pZ)+2;
38}
39
drhdf014892004-06-02 00:41:09 +000040/*
41** tclcmd: sqlite_utf8to16le STRING
42** title: Convert STRING from utf-8 to utf-16le
43**
44** Return the utf-16le encoded string
45*/
danielk1977998b56c2004-05-06 23:37:52 +000046static int sqlite_utf8to16le(
47 void * clientData,
48 Tcl_Interp *interp,
49 int objc,
50 Tcl_Obj *CONST objv[]
51){
52 unsigned char *out;
53 unsigned char *in;
54 Tcl_Obj *res;
55
56 if( objc!=2 ){
57 Tcl_AppendResult(interp, "wrong # args: should be \"",
58 Tcl_GetStringFromObj(objv[0], 0), "<utf-8 encoded-string>", 0);
59 return TCL_ERROR;
60 }
61
danielk1977295ba552004-05-19 10:34:51 +000062 in = Tcl_GetString(objv[1]);
danielk1977998b56c2004-05-06 23:37:52 +000063 out = (unsigned char *)sqlite3utf8to16le(in, -1);
danielk1977697e6e22004-05-07 01:50:37 +000064 res = Tcl_NewByteArrayObj(out, utf16_length(out));
danielk1977295ba552004-05-19 10:34:51 +000065 sqliteFree(out);
danielk1977998b56c2004-05-06 23:37:52 +000066
67 Tcl_SetObjResult(interp, res);
68
69 return TCL_OK;
70}
71
drhdf014892004-06-02 00:41:09 +000072/*
73** tclcmd: sqlite_utf8to16be STRING
74** title: Convert STRING from utf-8 to utf-16be
75**
76** Return the utf-16be encoded string
77*/
danielk1977998b56c2004-05-06 23:37:52 +000078static int sqlite_utf8to16be(
79 void * clientData,
80 Tcl_Interp *interp,
81 int objc,
82 Tcl_Obj *CONST objv[]
83){
84 unsigned char *out;
85 unsigned char *in;
86 Tcl_Obj *res;
87
88 if( objc!=2 ){
89 Tcl_AppendResult(interp, "wrong # args: should be \"",
90 Tcl_GetStringFromObj(objv[0], 0), "<utf-8 encoded-string>", 0);
91 return TCL_ERROR;
92 }
93
94 in = Tcl_GetByteArrayFromObj(objv[1], 0);
danielk1977295ba552004-05-19 10:34:51 +000095 in = Tcl_GetString(objv[1]);
danielk1977998b56c2004-05-06 23:37:52 +000096 out = (unsigned char *)sqlite3utf8to16be(in, -1);
danielk1977697e6e22004-05-07 01:50:37 +000097 res = Tcl_NewByteArrayObj(out, utf16_length(out));
danielk1977998b56c2004-05-06 23:37:52 +000098 sqliteFree(out);
99
100 Tcl_SetObjResult(interp, res);
101
102 return TCL_OK;
103}
104
drhdf014892004-06-02 00:41:09 +0000105/*
106** tclcmd: sqlite_utf16to16le STRING
107** title: Convert STRING from utf-16 in native byte order to utf-16le
108**
109** Return the utf-16le encoded string. If the input string contains
110** a byte-order mark, then the byte order mark should override the
111** native byte order.
112*/
danielk1977998b56c2004-05-06 23:37:52 +0000113static int sqlite_utf16to16le(
114 void * clientData,
115 Tcl_Interp *interp,
116 int objc,
117 Tcl_Obj *CONST objv[]
118){
119 unsigned char *out;
120 unsigned char *in;
121 int in_len;
122 Tcl_Obj *res;
123
124 if( objc!=2 ){
125 Tcl_AppendResult(interp, "wrong # args: should be \"",
126 Tcl_GetStringFromObj(objv[0], 0), "<utf-16 encoded-string>", 0);
127 return TCL_ERROR;
128 }
129
130 in = Tcl_GetByteArrayFromObj(objv[1], &in_len);
131 out = (unsigned char *)sqliteMalloc(in_len);
132 memcpy(out, in, in_len);
133
134 sqlite3utf16to16le(out, -1);
danielk1977697e6e22004-05-07 01:50:37 +0000135 res = Tcl_NewByteArrayObj(out, utf16_length(out));
danielk1977998b56c2004-05-06 23:37:52 +0000136 sqliteFree(out);
137
138 Tcl_SetObjResult(interp, res);
139
140 return TCL_OK;
141}
142
drhdf014892004-06-02 00:41:09 +0000143/*
144** tclcmd: sqlite_utf16to16be STRING
145** title: Convert STRING from utf-16 in native byte order to utf-16be
146**
147** Return the utf-16be encoded string. If the input string contains
148** a byte-order mark, then the byte order mark should override the
149** native byte order.
150*/
danielk1977998b56c2004-05-06 23:37:52 +0000151static int sqlite_utf16to16be(
152 void * clientData,
153 Tcl_Interp *interp,
154 int objc,
155 Tcl_Obj *CONST objv[]
156){
157 unsigned char *out;
158 unsigned char *in;
159 int in_len;
160 Tcl_Obj *res;
161
162 if( objc!=2 ){
163 Tcl_AppendResult(interp, "wrong # args: should be \"",
164 Tcl_GetStringFromObj(objv[0], 0), "<utf-16 encoded-string>", 0);
165 return TCL_ERROR;
166 }
167
168 in = Tcl_GetByteArrayFromObj(objv[1], &in_len);
169 out = (unsigned char *)sqliteMalloc(in_len);
170 memcpy(out, in, in_len);
171
172 sqlite3utf16to16be(out, -1);
danielk1977697e6e22004-05-07 01:50:37 +0000173 res = Tcl_NewByteArrayObj(out, utf16_length(out));
danielk1977998b56c2004-05-06 23:37:52 +0000174 sqliteFree(out);
175
176 Tcl_SetObjResult(interp, res);
177
178 return TCL_OK;
179}
180
drhdf014892004-06-02 00:41:09 +0000181/*
182** tclcmd: sqlite_utf16to8 STRING
183** title: Convert STRING from utf-16 in native byte order to utf-8
184**
185** Return the utf-8 encoded string. If the input string contains
186** a byte-order mark, then the byte order mark should override the
187** native byte order.
188*/
danielk1977998b56c2004-05-06 23:37:52 +0000189static int sqlite_utf16to8(
190 void * clientData,
191 Tcl_Interp *interp,
192 int objc,
193 Tcl_Obj *CONST objv[]
194){
195 unsigned char *out;
196 unsigned char *in;
197 Tcl_Obj *res;
198
199 if( objc!=2 ){
200 Tcl_AppendResult(interp, "wrong # args: should be \"",
danielk1977295ba552004-05-19 10:34:51 +0000201 Tcl_GetStringFromObj(objv[0], 0), " <utf-16 encoded-string>", 0);
danielk1977998b56c2004-05-06 23:37:52 +0000202 return TCL_ERROR;
203 }
204
205 in = Tcl_GetByteArrayFromObj(objv[1], 0);
drh9c054832004-05-31 18:51:57 +0000206 out = sqlite3utf16to8(in, -1, SQLITE_BIGENDIAN);
danielk1977295ba552004-05-19 10:34:51 +0000207 res = Tcl_NewByteArrayObj(out, strlen(out)+1);
danielk1977998b56c2004-05-06 23:37:52 +0000208 sqliteFree(out);
209
210 Tcl_SetObjResult(interp, res);
211
212 return TCL_OK;
213}
214
danielk1977295ba552004-05-19 10:34:51 +0000215/*
216** The first argument is a TCL UTF-8 string. Return the byte array
217** object with the encoded representation of the string, including
218** the NULL terminator.
219*/
220static int binarize(
221 void * clientData,
222 Tcl_Interp *interp,
223 int objc,
224 Tcl_Obj *CONST objv[]
225){
226 int len;
227 char *bytes;
228 Tcl_Obj *pRet;
229 assert(objc==2);
230
231 bytes = Tcl_GetStringFromObj(objv[1], &len);
232 pRet = Tcl_NewByteArrayObj(bytes, len+1);
233 Tcl_SetObjResult(interp, pRet);
234 return TCL_OK;
235}
236
danielk1977998b56c2004-05-06 23:37:52 +0000237
238/*
239** Register commands with the TCL interpreter.
240*/
241int Sqlitetest5_Init(Tcl_Interp *interp){
242 static struct {
243 char *zName;
danielk1977295ba552004-05-19 10:34:51 +0000244 Tcl_ObjCmdProc *xProc;
danielk1977998b56c2004-05-06 23:37:52 +0000245 } aCmd[] = {
danielk1977295ba552004-05-19 10:34:51 +0000246 { "sqlite_utf16to8", (Tcl_ObjCmdProc*)sqlite_utf16to8 },
247 { "sqlite_utf8to16le", (Tcl_ObjCmdProc*)sqlite_utf8to16le },
248 { "sqlite_utf8to16be", (Tcl_ObjCmdProc*)sqlite_utf8to16be },
249 { "sqlite_utf16to16le", (Tcl_ObjCmdProc*)sqlite_utf16to16le },
250 { "sqlite_utf16to16be", (Tcl_ObjCmdProc*)sqlite_utf16to16be },
danielk197724162fe2004-06-04 06:22:00 +0000251 { "binarize", (Tcl_ObjCmdProc*)binarize },
danielk1977998b56c2004-05-06 23:37:52 +0000252 };
253 int i;
254 for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
danielk1977295ba552004-05-19 10:34:51 +0000255 Tcl_CreateObjCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);
danielk1977998b56c2004-05-06 23:37:52 +0000256 }
257
258 return TCL_OK;
259}