blob: ec1ea0ddb479cb31b12a2f0136429d70e4bcbaf5 [file] [log] [blame]
danielk1977998b56c2004-05-06 23:37:52 +00001/*
2** 2001 September 15
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12** Code for testing the utf.c module in SQLite. This code
13** is not included in the SQLite library. It is used for automated
danielk1977295ba552004-05-19 10:34:51 +000014** testing of the SQLite library. Specifically, the code in this file
15** is used for testing the SQLite routines for converting between
16** the various supported unicode encodings.
danielk1977998b56c2004-05-06 23:37:52 +000017**
danielk19770202b292004-06-09 09:55:16 +000018** $Id: test5.c,v 1.9 2004/06/09 09:55:19 danielk1977 Exp $
danielk1977998b56c2004-05-06 23:37:52 +000019*/
20#include "sqliteInt.h"
danielk19770202b292004-06-09 09:55:16 +000021#include "vdbeInt.h"
drh9c054832004-05-31 18:51:57 +000022#include "os.h" /* to get SQLITE_BIGENDIAN */
danielk1977998b56c2004-05-06 23:37:52 +000023#include "tcl.h"
24#include <stdlib.h>
25#include <string.h>
26
27/*
danielk1977295ba552004-05-19 10:34:51 +000028** Return the number of bytes up to and including the first pair of
29** 0x00 bytes in *pStr.
danielk1977998b56c2004-05-06 23:37:52 +000030*/
31static int utf16_length(const unsigned char *pZ){
32 const unsigned char *pC1 = pZ;
33 const unsigned char *pC2 = pZ+1;
34 while( *pC1 || *pC2 ){
35 pC1 += 2;
36 pC2 += 2;
37 }
38 return (pC1-pZ)+2;
39}
40
drhdf014892004-06-02 00:41:09 +000041/*
42** tclcmd: sqlite_utf8to16le STRING
43** title: Convert STRING from utf-8 to utf-16le
44**
45** Return the utf-16le encoded string
46*/
danielk1977998b56c2004-05-06 23:37:52 +000047static int sqlite_utf8to16le(
48 void * clientData,
49 Tcl_Interp *interp,
50 int objc,
51 Tcl_Obj *CONST objv[]
52){
53 unsigned char *out;
54 unsigned char *in;
55 Tcl_Obj *res;
56
57 if( objc!=2 ){
58 Tcl_AppendResult(interp, "wrong # args: should be \"",
59 Tcl_GetStringFromObj(objv[0], 0), "<utf-8 encoded-string>", 0);
60 return TCL_ERROR;
61 }
62
danielk1977295ba552004-05-19 10:34:51 +000063 in = Tcl_GetString(objv[1]);
danielk1977998b56c2004-05-06 23:37:52 +000064 out = (unsigned char *)sqlite3utf8to16le(in, -1);
danielk1977697e6e22004-05-07 01:50:37 +000065 res = Tcl_NewByteArrayObj(out, utf16_length(out));
danielk1977295ba552004-05-19 10:34:51 +000066 sqliteFree(out);
danielk1977998b56c2004-05-06 23:37:52 +000067
68 Tcl_SetObjResult(interp, res);
69
70 return TCL_OK;
71}
72
drhdf014892004-06-02 00:41:09 +000073/*
74** tclcmd: sqlite_utf8to16be STRING
75** title: Convert STRING from utf-8 to utf-16be
76**
77** Return the utf-16be encoded string
78*/
danielk1977998b56c2004-05-06 23:37:52 +000079static int sqlite_utf8to16be(
80 void * clientData,
81 Tcl_Interp *interp,
82 int objc,
83 Tcl_Obj *CONST objv[]
84){
85 unsigned char *out;
86 unsigned char *in;
87 Tcl_Obj *res;
88
89 if( objc!=2 ){
90 Tcl_AppendResult(interp, "wrong # args: should be \"",
91 Tcl_GetStringFromObj(objv[0], 0), "<utf-8 encoded-string>", 0);
92 return TCL_ERROR;
93 }
94
95 in = Tcl_GetByteArrayFromObj(objv[1], 0);
danielk1977295ba552004-05-19 10:34:51 +000096 in = Tcl_GetString(objv[1]);
danielk1977998b56c2004-05-06 23:37:52 +000097 out = (unsigned char *)sqlite3utf8to16be(in, -1);
danielk1977697e6e22004-05-07 01:50:37 +000098 res = Tcl_NewByteArrayObj(out, utf16_length(out));
danielk1977998b56c2004-05-06 23:37:52 +000099 sqliteFree(out);
100
101 Tcl_SetObjResult(interp, res);
102
103 return TCL_OK;
104}
105
drhdf014892004-06-02 00:41:09 +0000106/*
107** tclcmd: sqlite_utf16to16le STRING
108** title: Convert STRING from utf-16 in native byte order to utf-16le
109**
110** Return the utf-16le encoded string. If the input string contains
111** a byte-order mark, then the byte order mark should override the
112** native byte order.
113*/
danielk1977998b56c2004-05-06 23:37:52 +0000114static int sqlite_utf16to16le(
115 void * clientData,
116 Tcl_Interp *interp,
117 int objc,
118 Tcl_Obj *CONST objv[]
119){
120 unsigned char *out;
121 unsigned char *in;
122 int in_len;
123 Tcl_Obj *res;
124
125 if( objc!=2 ){
126 Tcl_AppendResult(interp, "wrong # args: should be \"",
127 Tcl_GetStringFromObj(objv[0], 0), "<utf-16 encoded-string>", 0);
128 return TCL_ERROR;
129 }
130
131 in = Tcl_GetByteArrayFromObj(objv[1], &in_len);
132 out = (unsigned char *)sqliteMalloc(in_len);
133 memcpy(out, in, in_len);
134
135 sqlite3utf16to16le(out, -1);
danielk1977697e6e22004-05-07 01:50:37 +0000136 res = Tcl_NewByteArrayObj(out, utf16_length(out));
danielk1977998b56c2004-05-06 23:37:52 +0000137 sqliteFree(out);
138
139 Tcl_SetObjResult(interp, res);
140
141 return TCL_OK;
142}
143
drhdf014892004-06-02 00:41:09 +0000144/*
145** tclcmd: sqlite_utf16to16be STRING
146** title: Convert STRING from utf-16 in native byte order to utf-16be
147**
148** Return the utf-16be encoded string. If the input string contains
149** a byte-order mark, then the byte order mark should override the
150** native byte order.
151*/
danielk1977998b56c2004-05-06 23:37:52 +0000152static int sqlite_utf16to16be(
153 void * clientData,
154 Tcl_Interp *interp,
155 int objc,
156 Tcl_Obj *CONST objv[]
157){
158 unsigned char *out;
159 unsigned char *in;
160 int in_len;
161 Tcl_Obj *res;
162
163 if( objc!=2 ){
164 Tcl_AppendResult(interp, "wrong # args: should be \"",
165 Tcl_GetStringFromObj(objv[0], 0), "<utf-16 encoded-string>", 0);
166 return TCL_ERROR;
167 }
168
169 in = Tcl_GetByteArrayFromObj(objv[1], &in_len);
170 out = (unsigned char *)sqliteMalloc(in_len);
171 memcpy(out, in, in_len);
172
173 sqlite3utf16to16be(out, -1);
danielk1977697e6e22004-05-07 01:50:37 +0000174 res = Tcl_NewByteArrayObj(out, utf16_length(out));
danielk1977998b56c2004-05-06 23:37:52 +0000175 sqliteFree(out);
176
177 Tcl_SetObjResult(interp, res);
178
179 return TCL_OK;
180}
181
drhdf014892004-06-02 00:41:09 +0000182/*
183** tclcmd: sqlite_utf16to8 STRING
184** title: Convert STRING from utf-16 in native byte order to utf-8
185**
186** Return the utf-8 encoded string. If the input string contains
187** a byte-order mark, then the byte order mark should override the
188** native byte order.
189*/
danielk1977998b56c2004-05-06 23:37:52 +0000190static int sqlite_utf16to8(
191 void * clientData,
192 Tcl_Interp *interp,
193 int objc,
194 Tcl_Obj *CONST objv[]
195){
196 unsigned char *out;
197 unsigned char *in;
198 Tcl_Obj *res;
199
200 if( objc!=2 ){
201 Tcl_AppendResult(interp, "wrong # args: should be \"",
danielk1977295ba552004-05-19 10:34:51 +0000202 Tcl_GetStringFromObj(objv[0], 0), " <utf-16 encoded-string>", 0);
danielk1977998b56c2004-05-06 23:37:52 +0000203 return TCL_ERROR;
204 }
205
206 in = Tcl_GetByteArrayFromObj(objv[1], 0);
drh9c054832004-05-31 18:51:57 +0000207 out = sqlite3utf16to8(in, -1, SQLITE_BIGENDIAN);
danielk1977295ba552004-05-19 10:34:51 +0000208 res = Tcl_NewByteArrayObj(out, strlen(out)+1);
danielk1977998b56c2004-05-06 23:37:52 +0000209 sqliteFree(out);
210
211 Tcl_SetObjResult(interp, res);
212
213 return TCL_OK;
214}
215
danielk1977295ba552004-05-19 10:34:51 +0000216/*
217** The first argument is a TCL UTF-8 string. Return the byte array
218** object with the encoded representation of the string, including
219** the NULL terminator.
220*/
221static int binarize(
222 void * clientData,
223 Tcl_Interp *interp,
224 int objc,
225 Tcl_Obj *CONST objv[]
226){
227 int len;
228 char *bytes;
229 Tcl_Obj *pRet;
230 assert(objc==2);
231
232 bytes = Tcl_GetStringFromObj(objv[1], &len);
233 pRet = Tcl_NewByteArrayObj(bytes, len+1);
234 Tcl_SetObjResult(interp, pRet);
235 return TCL_OK;
236}
237
danielk19770202b292004-06-09 09:55:16 +0000238/*
239** Usage: test_value_overhead <repeat-count> <do-calls>.
240**
241** This routine is used to test the overhead of calls to
242** sqlite3_value_text(), on a value that contains a UTF-8 string. The idea
243** is to figure out whether or not it is a problem to use sqlite3_value
244** structures with collation sequence functions.
245**
246** If <do-calls> is 0, then the calls to sqlite3_value_text() are not
247** actually made.
248*/
249static int test_value_overhead(
250 void * clientData,
251 Tcl_Interp *interp,
252 int objc,
253 Tcl_Obj *CONST objv[]
254){
255 int do_calls;
256 int repeat_count;
257 int i;
258 Mem val;
259 const char *zVal;
260
261 if( objc!=3 ){
262 Tcl_AppendResult(interp, "wrong # args: should be \"",
263 Tcl_GetStringFromObj(objv[0], 0), " <repeat-count> <do-calls>", 0);
264 return TCL_ERROR;
265 }
266
267 if( Tcl_GetIntFromObj(interp, objv[1], &repeat_count) ) return TCL_ERROR;
268 if( Tcl_GetIntFromObj(interp, objv[2], &do_calls) ) return TCL_ERROR;
269
270 val.flags = MEM_Str|MEM_Term|MEM_Static;
271 val.z = "hello world";
272 val.type = SQLITE_TEXT;
273 val.enc = TEXT_Utf8;
274
275 for(i=0; i<repeat_count; i++){
276 if( do_calls ){
277 zVal = sqlite3_value_text(&val);
278 }
279 }
280
281 return TCL_OK;
282}
283
danielk1977998b56c2004-05-06 23:37:52 +0000284
285/*
286** Register commands with the TCL interpreter.
287*/
288int Sqlitetest5_Init(Tcl_Interp *interp){
289 static struct {
290 char *zName;
danielk1977295ba552004-05-19 10:34:51 +0000291 Tcl_ObjCmdProc *xProc;
danielk1977998b56c2004-05-06 23:37:52 +0000292 } aCmd[] = {
danielk1977295ba552004-05-19 10:34:51 +0000293 { "sqlite_utf16to8", (Tcl_ObjCmdProc*)sqlite_utf16to8 },
294 { "sqlite_utf8to16le", (Tcl_ObjCmdProc*)sqlite_utf8to16le },
295 { "sqlite_utf8to16be", (Tcl_ObjCmdProc*)sqlite_utf8to16be },
296 { "sqlite_utf16to16le", (Tcl_ObjCmdProc*)sqlite_utf16to16le },
297 { "sqlite_utf16to16be", (Tcl_ObjCmdProc*)sqlite_utf16to16be },
danielk197724162fe2004-06-04 06:22:00 +0000298 { "binarize", (Tcl_ObjCmdProc*)binarize },
danielk19770202b292004-06-09 09:55:16 +0000299 { "test_value_overhead", (Tcl_ObjCmdProc*)test_value_overhead },
danielk1977998b56c2004-05-06 23:37:52 +0000300 };
301 int i;
302 for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
danielk1977295ba552004-05-19 10:34:51 +0000303 Tcl_CreateObjCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);
danielk1977998b56c2004-05-06 23:37:52 +0000304 }
danielk19770202b292004-06-09 09:55:16 +0000305 return SQLITE_OK;
danielk1977998b56c2004-05-06 23:37:52 +0000306}
danielk19770202b292004-06-09 09:55:16 +0000307