danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 1 | # 2002 May 24 |
| 2 | # |
| 3 | # The author disclaims copyright to this source code. In place of |
| 4 | # a legal notice, here is a blessing: |
| 5 | # |
| 6 | # May you do good and not evil. |
| 7 | # May you find forgiveness for yourself and forgive others. |
| 8 | # May you share freely, never taking more than you give. |
| 9 | # |
| 10 | #*********************************************************************** |
| 11 | # This file implements regression tests for SQLite library. The focus of |
| 12 | # this file is testing the SQLite routines used for converting between the |
| 13 | # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and |
| 14 | # UTF-16be). |
| 15 | # |
danielk1977 | 7677c0c | 2007-05-23 16:23:09 +0000 | [diff] [blame] | 16 | # $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $ |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 17 | |
| 18 | set testdir [file dirname $argv0] |
| 19 | source $testdir/tester.tcl |
| 20 | |
drh | 6c62608 | 2004-11-14 21:56:29 +0000 | [diff] [blame] | 21 | # Skip this test if the build does not support multiple encodings. |
| 22 | # |
| 23 | ifcapable {!utf16} { |
| 24 | finish_test |
| 25 | return |
| 26 | } |
| 27 | |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 28 | proc do_bincmp_test {testname got expect} { |
| 29 | binary scan $expect \c* expectvals |
| 30 | binary scan $got \c* gotvals |
| 31 | do_test $testname [list set dummy $gotvals] $expectvals |
| 32 | } |
| 33 | |
| 34 | # $utf16 is a UTF-16 encoded string. Swap each pair of bytes around |
| 35 | # to change the byte-order of the string. |
| 36 | proc swap_byte_order {utf16} { |
| 37 | binary scan $utf16 \c* ints |
| 38 | |
| 39 | foreach {a b} $ints { |
| 40 | lappend ints2 $b |
| 41 | lappend ints2 $a |
| 42 | } |
| 43 | |
| 44 | return [binary format \c* $ints2] |
| 45 | } |
| 46 | |
| 47 | # |
| 48 | # Test that the SQLite routines for converting between UTF encodings |
| 49 | # produce the same results as their TCL counterparts. |
| 50 | # |
| 51 | # $testname is the prefix to be used for the test names. |
| 52 | # $str is a string to use for testing (encoded in UTF-8, as normal for TCL). |
| 53 | # |
| 54 | # The test procedure is: |
| 55 | # 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and |
| 56 | # SQLite routines produce the same results. |
| 57 | # |
| 58 | # 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and |
| 59 | # SQLite routines produce the same results. |
| 60 | # |
| 61 | # 3. Use the SQLite routines to convert the native machine order UTF-16 |
| 62 | # representation back to the original UTF-8. Check that the result |
| 63 | # matches the original representation. |
| 64 | # |
| 65 | # 4. Add a byte-order mark to each of the UTF-16 representations and |
| 66 | # check that the SQLite routines can convert them back to UTF-8. For |
| 67 | # byte-order mark info, refer to section 3.10 of the unicode standard. |
| 68 | # |
| 69 | # 5. Take the byte-order marked UTF-16 strings from step 4 and ensure |
| 70 | # that SQLite can convert them both to native byte order UTF-16 |
| 71 | # strings, sans BOM. |
| 72 | # |
| 73 | # Coverage: |
| 74 | # |
| 75 | # sqlite_utf8to16be (step 2) |
| 76 | # sqlite_utf8to16le (step 1) |
| 77 | # sqlite_utf16to8 (steps 3, 4) |
| 78 | # sqlite_utf16to16le (step 5) |
| 79 | # sqlite_utf16to16be (step 5) |
| 80 | # |
| 81 | proc test_conversion {testname str} { |
| 82 | |
| 83 | # Step 1. |
drh | ef4ac8f | 2004-06-19 00:16:31 +0000 | [diff] [blame] | 84 | set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE] |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 85 | set utf16le_tcl [encoding convertto unicode $str] |
| 86 | append utf16le_tcl "\x00\x00" |
| 87 | if { $::tcl_platform(byteOrder)!="littleEndian" } { |
| 88 | set utf16le_tcl [swap_byte_order $utf16le_tcl] |
| 89 | } |
drh | ef4ac8f | 2004-06-19 00:16:31 +0000 | [diff] [blame] | 90 | do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 91 | set utf16le $utf16le_tcl |
| 92 | |
| 93 | # Step 2. |
drh | ef4ac8f | 2004-06-19 00:16:31 +0000 | [diff] [blame] | 94 | set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE] |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 95 | set utf16be_tcl [encoding convertto unicode $str] |
| 96 | append utf16be_tcl "\x00\x00" |
| 97 | if { $::tcl_platform(byteOrder)=="littleEndian" } { |
| 98 | set utf16be_tcl [swap_byte_order $utf16be_tcl] |
| 99 | } |
drh | ef4ac8f | 2004-06-19 00:16:31 +0000 | [diff] [blame] | 100 | do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 101 | set utf16be $utf16be_tcl |
| 102 | |
| 103 | # Step 3. |
| 104 | if { $::tcl_platform(byteOrder)=="littleEndian" } { |
| 105 | set utf16 $utf16le |
| 106 | } else { |
| 107 | set utf16 $utf16be |
| 108 | } |
drh | ef4ac8f | 2004-06-19 00:16:31 +0000 | [diff] [blame] | 109 | set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8] |
| 110 | do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str] |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 111 | |
| 112 | # Step 4 (little endian). |
| 113 | append utf16le_bom "\xFF\xFE" $utf16le |
danielk1977 | 1ba1b55 | 2004-06-23 13:46:32 +0000 | [diff] [blame] | 114 | set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1] |
drh | ef4ac8f | 2004-06-19 00:16:31 +0000 | [diff] [blame] | 115 | do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str] |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 116 | |
| 117 | # Step 4 (big endian). |
| 118 | append utf16be_bom "\xFE\xFF" $utf16be |
drh | ef4ac8f | 2004-06-19 00:16:31 +0000 | [diff] [blame] | 119 | set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8] |
| 120 | do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str] |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 121 | |
| 122 | # Step 5 (little endian to little endian). |
drh | ef4ac8f | 2004-06-19 00:16:31 +0000 | [diff] [blame] | 123 | set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE] |
| 124 | do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 125 | |
| 126 | # Step 5 (big endian to big endian). |
drh | ef4ac8f | 2004-06-19 00:16:31 +0000 | [diff] [blame] | 127 | set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE] |
| 128 | do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 129 | |
| 130 | # Step 5 (big endian to little endian). |
drh | ef4ac8f | 2004-06-19 00:16:31 +0000 | [diff] [blame] | 131 | set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE] |
| 132 | do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 133 | |
| 134 | # Step 5 (little endian to big endian). |
drh | ef4ac8f | 2004-06-19 00:16:31 +0000 | [diff] [blame] | 135 | set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE] |
| 136 | do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 137 | } |
| 138 | |
danielk1977 | bfd6cce | 2004-06-18 04:24:54 +0000 | [diff] [blame] | 139 | translate_selftest |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 140 | |
| 141 | test_conversion enc-1 "hello world" |
| 142 | test_conversion enc-2 "sqlite" |
| 143 | test_conversion enc-3 "" |
danielk1977 | bfd6cce | 2004-06-18 04:24:54 +0000 | [diff] [blame] | 144 | test_conversion enc-X "\u0100" |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 145 | test_conversion enc-4 "\u1234" |
| 146 | test_conversion enc-5 "\u4321abc" |
| 147 | test_conversion enc-6 "\u4321\u1234" |
| 148 | test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100] |
| 149 | test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100] |
| 150 | test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100] |
danielk1977 | a9c16b0 | 2007-05-16 18:11:41 +0000 | [diff] [blame] | 151 | test_conversion enc-10 [string repeat "\uE000" 100] |
| 152 | |
danielk1977 | 7677c0c | 2007-05-23 16:23:09 +0000 | [diff] [blame] | 153 | proc test_collate {enc zLeft zRight} { |
| 154 | return [string compare $zLeft $zRight] |
| 155 | } |
| 156 | add_test_collate $::DB 0 0 1 |
| 157 | do_test enc-11.1 { |
| 158 | execsql { |
| 159 | CREATE TABLE ab(a COLLATE test_collate, b); |
| 160 | INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800'); |
| 161 | INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800'); |
| 162 | CREATE INDEX ab_i ON ab(a, b); |
| 163 | } |
| 164 | } {} |
| 165 | do_test enc-11.2 { |
| 166 | set cp200 "\u00C8" |
| 167 | execsql { |
| 168 | SELECT count(*) FROM ab WHERE a = $::cp200; |
| 169 | } |
| 170 | } {2} |
| 171 | |
danielk1977 | 28d47b5 | 2004-05-22 08:16:11 +0000 | [diff] [blame] | 172 | finish_test |