shess | db00b6a | 2007-08-20 17:38:42 +0000 | [diff] [blame] | 1 | # 2007 March 28 |
| 2 | # |
| 3 | # The author disclaims copyright to this source code. |
| 4 | # |
| 5 | #************************************************************************* |
| 6 | # This file implements regression tests for SQLite library. The focus |
| 7 | # of this script is testing isspace/isalnum/tolower problems with the |
| 8 | # FTS3 module. Unfortunately, this code isn't a really principled set |
drh | 85b623f | 2007-12-13 21:54:09 +0000 | [diff] [blame] | 9 | # of tests, because it is impossible to know where new uses of these |
shess | db00b6a | 2007-08-20 17:38:42 +0000 | [diff] [blame] | 10 | # functions might appear. |
| 11 | # |
drh | 85b623f | 2007-12-13 21:54:09 +0000 | [diff] [blame] | 12 | # $Id: fts3al.test,v 1.2 2007/12/13 21:54:11 drh Exp $ |
shess | db00b6a | 2007-08-20 17:38:42 +0000 | [diff] [blame] | 13 | # |
| 14 | |
| 15 | set testdir [file dirname $argv0] |
| 16 | source $testdir/tester.tcl |
| 17 | |
| 18 | # If SQLITE_ENABLE_FTS3 is defined, omit this file. |
| 19 | ifcapable !fts3 { |
| 20 | finish_test |
| 21 | return |
| 22 | } |
| 23 | |
| 24 | # Tests that startsWith() (calls isspace, tolower, isalnum) can handle |
| 25 | # hi-bit chars. parseSpec() also calls isalnum here. |
| 26 | do_test fts3al-1.1 { |
| 27 | execsql "CREATE VIRTUAL TABLE t1 USING fts3(content, \x80)" |
| 28 | } {} |
| 29 | |
| 30 | # Additionally tests isspace() call in getToken(), and isalnum() call |
| 31 | # in tokenListToIdList(). |
| 32 | do_test fts3al-1.2 { |
| 33 | catch { |
| 34 | execsql "CREATE VIRTUAL TABLE t2 USING fts3(content, tokenize \x80)" |
| 35 | } |
| 36 | sqlite3_errmsg $DB |
| 37 | } "unknown tokenizer: \x80" |
| 38 | |
| 39 | # Additionally test final isalnum() in startsWith(). |
| 40 | do_test fts3al-1.3 { |
| 41 | execsql "CREATE VIRTUAL TABLE t3 USING fts3(content, tokenize\x80)" |
| 42 | } {} |
| 43 | |
| 44 | # The snippet-generation code has calls to isspace() which are sort of |
| 45 | # hard to get to. It finds convenient breakpoints by starting ~40 |
| 46 | # chars before and after the matched term, and scanning ~10 chars |
| 47 | # around that position for isspace() characters. The long word with |
| 48 | # embedded hi-bit chars causes one of these isspace() calls to be |
| 49 | # exercised. The version with a couple extra spaces should cause the |
| 50 | # other isspace() call to be exercised. [Both cases have been tested |
| 51 | # in the debugger, but I'm hoping to continue to catch it if simple |
| 52 | # constant changes change things slightly. |
| 53 | # |
| 54 | # The trailing and leading hi-bit chars help with code which tests for |
| 55 | # isspace() to coalesce multiple spaces. |
dan | b023b04 | 2010-01-06 17:19:21 +0000 | [diff] [blame] | 56 | # |
| 57 | # UPDATE: The above is no longer true; there is no such code in fts3. |
| 58 | # But leave the test in just the same. |
| 59 | # |
shess | db00b6a | 2007-08-20 17:38:42 +0000 | [diff] [blame] | 60 | |
| 61 | set word "\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80" |
| 62 | set phrase1 "$word $word $word target $word $word $word" |
| 63 | set phrase2 "$word $word $word target $word $word $word" |
| 64 | |
| 65 | db eval {CREATE VIRTUAL TABLE t4 USING fts3(content)} |
| 66 | db eval "INSERT INTO t4 (content) VALUES ('$phrase1')" |
| 67 | db eval "INSERT INTO t4 (content) VALUES ('$phrase2')" |
| 68 | |
| 69 | do_test fts3al-1.4 { |
| 70 | execsql {SELECT rowid, length(snippet(t4)) FROM t4 WHERE t4 MATCH 'target'} |
dan | b023b04 | 2010-01-06 17:19:21 +0000 | [diff] [blame] | 71 | } {1 241 2 247} |
shess | db00b6a | 2007-08-20 17:38:42 +0000 | [diff] [blame] | 72 | |
| 73 | finish_test |