blob: 5c24bbb7f65f634365a5c0b4c49c3289b752811c [file] [log] [blame]
danielk197728d47b52004-05-22 08:16:11 +00001# 2002 May 24
2#
3# The author disclaims copyright to this source code. In place of
4# a legal notice, here is a blessing:
5#
6# May you do good and not evil.
7# May you find forgiveness for yourself and forgive others.
8# May you share freely, never taking more than you give.
9#
10#***********************************************************************
11# This file implements regression tests for SQLite library. The focus of
12# this file is testing the SQLite routines used for converting between the
13# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
14# UTF-16be).
15#
danielk19777677c0c2007-05-23 16:23:09 +000016# $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $
danielk197728d47b52004-05-22 08:16:11 +000017
18set testdir [file dirname $argv0]
19source $testdir/tester.tcl
20
drh6c626082004-11-14 21:56:29 +000021# Skip this test if the build does not support multiple encodings.
22#
23ifcapable {!utf16} {
24 finish_test
25 return
26}
27
danielk197728d47b52004-05-22 08:16:11 +000028proc do_bincmp_test {testname got expect} {
29 binary scan $expect \c* expectvals
30 binary scan $got \c* gotvals
31 do_test $testname [list set dummy $gotvals] $expectvals
32}
33
34# $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
35# to change the byte-order of the string.
36proc swap_byte_order {utf16} {
37 binary scan $utf16 \c* ints
38
39 foreach {a b} $ints {
40 lappend ints2 $b
41 lappend ints2 $a
42 }
43
44 return [binary format \c* $ints2]
45}
46
47#
48# Test that the SQLite routines for converting between UTF encodings
49# produce the same results as their TCL counterparts.
50#
51# $testname is the prefix to be used for the test names.
52# $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
53#
54# The test procedure is:
55# 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
56# SQLite routines produce the same results.
57#
58# 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and
59# SQLite routines produce the same results.
60#
61# 3. Use the SQLite routines to convert the native machine order UTF-16
62# representation back to the original UTF-8. Check that the result
63# matches the original representation.
64#
65# 4. Add a byte-order mark to each of the UTF-16 representations and
66# check that the SQLite routines can convert them back to UTF-8. For
67# byte-order mark info, refer to section 3.10 of the unicode standard.
68#
69# 5. Take the byte-order marked UTF-16 strings from step 4 and ensure
70# that SQLite can convert them both to native byte order UTF-16
71# strings, sans BOM.
72#
73# Coverage:
74#
75# sqlite_utf8to16be (step 2)
76# sqlite_utf8to16le (step 1)
77# sqlite_utf16to8 (steps 3, 4)
78# sqlite_utf16to16le (step 5)
79# sqlite_utf16to16be (step 5)
80#
81proc test_conversion {testname str} {
82
83 # Step 1.
drhef4ac8f2004-06-19 00:16:31 +000084 set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE]
danielk197728d47b52004-05-22 08:16:11 +000085 set utf16le_tcl [encoding convertto unicode $str]
86 append utf16le_tcl "\x00\x00"
87 if { $::tcl_platform(byteOrder)!="littleEndian" } {
88 set utf16le_tcl [swap_byte_order $utf16le_tcl]
89 }
drhef4ac8f2004-06-19 00:16:31 +000090 do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl
danielk197728d47b52004-05-22 08:16:11 +000091 set utf16le $utf16le_tcl
92
93 # Step 2.
drhef4ac8f2004-06-19 00:16:31 +000094 set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE]
danielk197728d47b52004-05-22 08:16:11 +000095 set utf16be_tcl [encoding convertto unicode $str]
96 append utf16be_tcl "\x00\x00"
97 if { $::tcl_platform(byteOrder)=="littleEndian" } {
98 set utf16be_tcl [swap_byte_order $utf16be_tcl]
99 }
drhef4ac8f2004-06-19 00:16:31 +0000100 do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl
danielk197728d47b52004-05-22 08:16:11 +0000101 set utf16be $utf16be_tcl
102
103 # Step 3.
104 if { $::tcl_platform(byteOrder)=="littleEndian" } {
105 set utf16 $utf16le
106 } else {
107 set utf16 $utf16be
108 }
drhef4ac8f2004-06-19 00:16:31 +0000109 set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
110 do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]
danielk197728d47b52004-05-22 08:16:11 +0000111
112 # Step 4 (little endian).
113 append utf16le_bom "\xFF\xFE" $utf16le
danielk19771ba1b552004-06-23 13:46:32 +0000114 set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1]
drhef4ac8f2004-06-19 00:16:31 +0000115 do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]
danielk197728d47b52004-05-22 08:16:11 +0000116
117 # Step 4 (big endian).
118 append utf16be_bom "\xFE\xFF" $utf16be
drhef4ac8f2004-06-19 00:16:31 +0000119 set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
120 do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]
danielk197728d47b52004-05-22 08:16:11 +0000121
122 # Step 5 (little endian to little endian).
drhef4ac8f2004-06-19 00:16:31 +0000123 set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE]
124 do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le
danielk197728d47b52004-05-22 08:16:11 +0000125
126 # Step 5 (big endian to big endian).
drhef4ac8f2004-06-19 00:16:31 +0000127 set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE]
128 do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be
danielk197728d47b52004-05-22 08:16:11 +0000129
130 # Step 5 (big endian to little endian).
drhef4ac8f2004-06-19 00:16:31 +0000131 set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE]
132 do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le
danielk197728d47b52004-05-22 08:16:11 +0000133
134 # Step 5 (little endian to big endian).
drhef4ac8f2004-06-19 00:16:31 +0000135 set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE]
136 do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be
danielk197728d47b52004-05-22 08:16:11 +0000137}
138
danielk1977bfd6cce2004-06-18 04:24:54 +0000139translate_selftest
danielk197728d47b52004-05-22 08:16:11 +0000140
141test_conversion enc-1 "hello world"
142test_conversion enc-2 "sqlite"
143test_conversion enc-3 ""
danielk1977bfd6cce2004-06-18 04:24:54 +0000144test_conversion enc-X "\u0100"
danielk197728d47b52004-05-22 08:16:11 +0000145test_conversion enc-4 "\u1234"
146test_conversion enc-5 "\u4321abc"
147test_conversion enc-6 "\u4321\u1234"
148test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
149test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
150test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
danielk1977a9c16b02007-05-16 18:11:41 +0000151test_conversion enc-10 [string repeat "\uE000" 100]
152
danielk19777677c0c2007-05-23 16:23:09 +0000153proc test_collate {enc zLeft zRight} {
154 return [string compare $zLeft $zRight]
155}
156add_test_collate $::DB 0 0 1
157do_test enc-11.1 {
158 execsql {
159 CREATE TABLE ab(a COLLATE test_collate, b);
160 INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800');
161 INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800');
162 CREATE INDEX ab_i ON ab(a, b);
163 }
164} {}
165do_test enc-11.2 {
166 set cp200 "\u00C8"
167 execsql {
168 SELECT count(*) FROM ab WHERE a = $::cp200;
169 }
170} {2}
171
danielk197728d47b52004-05-22 08:16:11 +0000172finish_test