blob: 179c16bcacd44d55e88a8d5fcf5acb6653841fcd [file] [log] [blame]
dan31745982010-01-07 10:54:28 +00001# 2010 January 07
2#
3# The author disclaims copyright to this source code. In place of
4# a legal notice, here is a blessing:
5#
6# May you do good and not evil.
7# May you find forgiveness for yourself and forgive others.
8# May you share freely, never taking more than you give.
9#
10#*************************************************************************
11#
dan3996a062010-01-15 17:25:52 +000012# The tests in this file test the FTS3 auxillary functions offsets(),
13# snippet() and matchinfo() work. At time of writing, running this file
14# provides full coverage of fts3_snippet.c.
15#
danb023b042010-01-06 17:19:21 +000016
17set testdir [file dirname $argv0]
18source $testdir/tester.tcl
19
dan31745982010-01-07 10:54:28 +000020# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
danb023b042010-01-06 17:19:21 +000021ifcapable !fts3 { finish_test ; return }
dan3996a062010-01-15 17:25:52 +000022source $testdir/fts3_common.tcl
23source $testdir/malloc_common.tcl
24
25set sqlite_fts3_enable_parentheses 1
26set DO_MALLOC_TEST 0
danb023b042010-01-06 17:19:21 +000027
dan31745982010-01-07 10:54:28 +000028# Transform the list $L to its "normal" form. So that it can be compared to
29# another list with the same set of elements using [string compare].
30#
31proc normalize {L} {
32 set ret [list]
33 foreach l $L {lappend ret $l}
34 return $ret
35}
36
dan3996a062010-01-15 17:25:52 +000037proc do_offsets_test {name expr args} {
38 set result [list]
39 foreach a $args {
40 lappend result [normalize $a]
danb023b042010-01-06 17:19:21 +000041 }
dan3996a062010-01-15 17:25:52 +000042 do_select_test $name {
43 SELECT offsets(ft) FROM ft WHERE ft MATCH $expr
44 } $result
45}
46
47# Document text used by a few tests. Contains the English names of all
48# integers between 1 and 300.
49#
50set numbers [normalize {
51 one two three four five six seven eight nine ten eleven twelve thirteen
52 fourteen fifteen sixteen seventeen eighteen nineteen twenty twentyone
53 twentytwo twentythree twentyfour twentyfive twentysix twentyseven
54 twentyeight twentynine thirty thirtyone thirtytwo thirtythree thirtyfour
55 thirtyfive thirtysix thirtyseven thirtyeight thirtynine forty fortyone
56 fortytwo fortythree fortyfour fortyfive fortysix fortyseven fortyeight
57 fortynine fifty fiftyone fiftytwo fiftythree fiftyfour fiftyfive fiftysix
58 fiftyseven fiftyeight fiftynine sixty sixtyone sixtytwo sixtythree sixtyfour
59 sixtyfive sixtysix sixtyseven sixtyeight sixtynine seventy seventyone
60 seventytwo seventythree seventyfour seventyfive seventysix seventyseven
61 seventyeight seventynine eighty eightyone eightytwo eightythree eightyfour
62 eightyfive eightysix eightyseven eightyeight eightynine ninety ninetyone
63 ninetytwo ninetythree ninetyfour ninetyfive ninetysix ninetyseven
64 ninetyeight ninetynine onehundred onehundredone onehundredtwo
65 onehundredthree onehundredfour onehundredfive onehundredsix onehundredseven
66 onehundredeight onehundrednine onehundredten onehundredeleven
67 onehundredtwelve onehundredthirteen onehundredfourteen onehundredfifteen
68 onehundredsixteen onehundredseventeen onehundredeighteen onehundrednineteen
69 onehundredtwenty onehundredtwentyone onehundredtwentytwo
70 onehundredtwentythree onehundredtwentyfour onehundredtwentyfive
71 onehundredtwentysix onehundredtwentyseven onehundredtwentyeight
72 onehundredtwentynine onehundredthirty onehundredthirtyone
73 onehundredthirtytwo onehundredthirtythree onehundredthirtyfour
74 onehundredthirtyfive onehundredthirtysix onehundredthirtyseven
75 onehundredthirtyeight onehundredthirtynine onehundredforty
76 onehundredfortyone onehundredfortytwo onehundredfortythree
77 onehundredfortyfour onehundredfortyfive onehundredfortysix
78 onehundredfortyseven onehundredfortyeight onehundredfortynine
79 onehundredfifty onehundredfiftyone onehundredfiftytwo onehundredfiftythree
80 onehundredfiftyfour onehundredfiftyfive onehundredfiftysix
81 onehundredfiftyseven onehundredfiftyeight onehundredfiftynine
82 onehundredsixty onehundredsixtyone onehundredsixtytwo onehundredsixtythree
83 onehundredsixtyfour onehundredsixtyfive onehundredsixtysix
84 onehundredsixtyseven onehundredsixtyeight onehundredsixtynine
85 onehundredseventy onehundredseventyone onehundredseventytwo
86 onehundredseventythree onehundredseventyfour onehundredseventyfive
87 onehundredseventysix onehundredseventyseven onehundredseventyeight
88 onehundredseventynine onehundredeighty onehundredeightyone
89 onehundredeightytwo onehundredeightythree onehundredeightyfour
90 onehundredeightyfive onehundredeightysix onehundredeightyseven
91 onehundredeightyeight onehundredeightynine onehundredninety
92 onehundredninetyone onehundredninetytwo onehundredninetythree
93 onehundredninetyfour onehundredninetyfive onehundredninetysix
94 onehundredninetyseven onehundredninetyeight onehundredninetynine twohundred
95 twohundredone twohundredtwo twohundredthree twohundredfour twohundredfive
96 twohundredsix twohundredseven twohundredeight twohundrednine twohundredten
97 twohundredeleven twohundredtwelve twohundredthirteen twohundredfourteen
98 twohundredfifteen twohundredsixteen twohundredseventeen twohundredeighteen
99 twohundrednineteen twohundredtwenty twohundredtwentyone twohundredtwentytwo
100 twohundredtwentythree twohundredtwentyfour twohundredtwentyfive
101 twohundredtwentysix twohundredtwentyseven twohundredtwentyeight
102 twohundredtwentynine twohundredthirty twohundredthirtyone
103 twohundredthirtytwo twohundredthirtythree twohundredthirtyfour
104 twohundredthirtyfive twohundredthirtysix twohundredthirtyseven
105 twohundredthirtyeight twohundredthirtynine twohundredforty
106 twohundredfortyone twohundredfortytwo twohundredfortythree
107 twohundredfortyfour twohundredfortyfive twohundredfortysix
108 twohundredfortyseven twohundredfortyeight twohundredfortynine
109 twohundredfifty twohundredfiftyone twohundredfiftytwo twohundredfiftythree
110 twohundredfiftyfour twohundredfiftyfive twohundredfiftysix
111 twohundredfiftyseven twohundredfiftyeight twohundredfiftynine
112 twohundredsixty twohundredsixtyone twohundredsixtytwo twohundredsixtythree
113 twohundredsixtyfour twohundredsixtyfive twohundredsixtysix
114 twohundredsixtyseven twohundredsixtyeight twohundredsixtynine
115 twohundredseventy twohundredseventyone twohundredseventytwo
116 twohundredseventythree twohundredseventyfour twohundredseventyfive
117 twohundredseventysix twohundredseventyseven twohundredseventyeight
118 twohundredseventynine twohundredeighty twohundredeightyone
119 twohundredeightytwo twohundredeightythree twohundredeightyfour
120 twohundredeightyfive twohundredeightysix twohundredeightyseven
121 twohundredeightyeight twohundredeightynine twohundredninety
122 twohundredninetyone twohundredninetytwo twohundredninetythree
123 twohundredninetyfour twohundredninetyfive twohundredninetysix
124 twohundredninetyseven twohundredninetyeight twohundredninetynine
125 threehundred
126}]
danb023b042010-01-06 17:19:21 +0000127
dan3996a062010-01-15 17:25:52 +0000128foreach {DO_MALLOC_TEST enc} {
129 0 utf8
130 1 utf8
131 1 utf16
dan31745982010-01-07 10:54:28 +0000132} {
dan3996a062010-01-15 17:25:52 +0000133
134 db close
135 file delete -force test.db
136 sqlite3 db test.db
137 db eval "PRAGMA encoding = \"$enc\""
138
139 # Set variable $T to the test name prefix for this iteration of the loop.
140 #
141 set T "fts3snippet-$enc"
142
143 ##########################################################################
144 # Test the offset function.
145 #
146 do_test $T.1.1 {
dan31745982010-01-07 10:54:28 +0000147 execsql {
dan3996a062010-01-15 17:25:52 +0000148 CREATE VIRTUAL TABLE ft USING fts3;
149 INSERT INTO ft VALUES('xxx xxx xxx xxx');
dan31745982010-01-07 10:54:28 +0000150 }
dan3996a062010-01-15 17:25:52 +0000151 } {}
152 do_offsets_test $T.1.2 {xxx} {0 0 0 3 0 0 4 3 0 0 8 3 0 0 12 3}
153 do_offsets_test $T.1.3 {"xxx xxx"} {
154 0 0 0 3 0 0 4 3 0 1 4 3 0 0 8 3
155 0 1 8 3 0 1 12 3
156 }
157 do_offsets_test $T.1.4 {"xxx xxx" xxx} {
158 0 0 0 3 0 2 0 3 0 0 4 3 0 1 4 3
159 0 2 4 3 0 0 8 3 0 1 8 3 0 2 8 3
160 0 1 12 3 0 2 12 3
161 }
162 do_offsets_test $T.1.5 {xxx "xxx xxx"} {
163 0 0 0 3 0 1 0 3 0 0 4 3 0 1 4 3
164 0 2 4 3 0 0 8 3 0 1 8 3 0 2 8 3
165 0 0 12 3 0 2 12 3
166 }
167
168 do_test $T.9.1 {
169 set v1 [lrange $numbers 0 99]
170 execsql {
171 DROP TABLE IF EXISTS ft;
172 CREATE VIRTUAL TABLE ft USING fts3(a, b);
173 INSERT INTO ft VALUES($v1, $numbers);
174 INSERT INTO ft VALUES($v1, NULL);
175 }
176 } {}
177
178 set off [string first "twohundred " $numbers]
179 do_offsets_test $T.9.1 {twohundred} [list 1 0 $off 10]
180
181 set off [string first "onehundred " $numbers]
182 do_offsets_test $T.9.2 {onehundred} \
183 [list 0 0 $off 10 1 0 $off 10] [list 0 0 $off 10]
184
185 # Test a corruption case:
186 execsql { UPDATE ft_content SET c1b = 'hello world' WHERE c1b = $numbers }
187 do_error_test $T.9.3 {
188 SELECT offsets(ft) FROM ft WHERE ft MATCH 'onehundred'
189 } {database disk image is malformed}
190
191 ##########################################################################
192 # Test the snippet function.
193 #
194 proc do_snippet_test {name expr iCol nTok args} {
195 set res [list]
196 foreach a $args { lappend res [string trim $a] }
197 do_select_test $name {
198 SELECT snippet(ft,'{','}','...',$iCol,$nTok) FROM ft WHERE ft MATCH $expr
199 } $res
200 }
201 do_test $T.2.1 {
202 execsql {
203 DROP TABLE IF EXISTS ft;
204 CREATE VIRTUAL TABLE ft USING fts3;
205 INSERT INTO ft VALUES('one two three four five six seven eight nine ten');
206 }
207 } {}
208 do_snippet_test $T.2.2 one 0 5 "{one} two three four five..."
209 do_snippet_test $T.2.3 two 0 5 "one {two} three four five..."
210 do_snippet_test $T.2.4 three 0 5 "one two {three} four five..."
211 do_snippet_test $T.2.5 four 0 5 "...two three {four} five six..."
212 do_snippet_test $T.2.6 five 0 5 "...three four {five} six seven..."
213 do_snippet_test $T.2.7 six 0 5 "...four five {six} seven eight..."
214 do_snippet_test $T.2.8 seven 0 5 "...five six {seven} eight nine..."
215 do_snippet_test $T.2.9 eight 0 5 "...six seven {eight} nine ten"
216 do_snippet_test $T.2.10 nine 0 5 "...six seven eight {nine} ten"
217 do_snippet_test $T.2.11 ten 0 5 "...six seven eight nine {ten}"
218
219 do_test $T.3.1 {
220 execsql {
221 INSERT INTO ft VALUES(
222 'one two three four five '
223 || 'six seven eight nine ten '
224 || 'eleven twelve thirteen fourteen fifteen '
225 || 'sixteen seventeen eighteen nineteen twenty '
226 || 'one two three four five '
227 || 'six seven eight nine ten '
228 || 'eleven twelve thirteen fourteen fifteen '
229 || 'sixteen seventeen eighteen nineteen twenty'
230 );
231 }
232 } {}
233
234 do_snippet_test $T.3.2 {one nine} 0 5 {
235 {one} two three...eight {nine} ten
236 } {
237 {one} two three...eight {nine} ten...
238 }
239
240 do_snippet_test $T.3.3 {one nine} 0 -5 {
241 {one} two three four five...six seven eight {nine} ten
242 } {
243 {one} two three four five...seven eight {nine} ten eleven...
244 }
245 do_snippet_test $T.3.3 {one nineteen} 0 -5 {
246 ...eighteen {nineteen} twenty {one} two...
247 }
248 do_snippet_test $T.3.4 {two nineteen} 0 -5 {
249 ...eighteen {nineteen} twenty one {two}...
250 }
251 do_snippet_test $T.3.5 {three nineteen} 0 -5 {
252 ...{nineteen} twenty one two {three}...
253 }
254
255 do_snippet_test $T.3.6 {four nineteen} 0 -5 {
256 ...two three {four} five six...seventeen eighteen {nineteen} twenty one...
257 }
258 do_snippet_test $T.3.7 {four NEAR nineteen} 0 -5 {
259 ...seventeen eighteen {nineteen} twenty one...two three {four} five six...
260 }
261
262 do_snippet_test $T.3.8 {four nineteen} 0 5 {
263 ...three {four} five...eighteen {nineteen} twenty...
264 }
265 do_snippet_test $T.3.9 {four NEAR nineteen} 0 5 {
266 ...eighteen {nineteen} twenty...three {four} five...
267 }
268 do_snippet_test $T.3.10 {four NEAR nineteen} 0 -5 {
269 ...seventeen eighteen {nineteen} twenty one...two three {four} five six...
270 }
271 do_snippet_test $T.3.11 {four NOT (nineteen twentyone)} 0 5 {
272 ...two three {four} five six...
273 } {
274 ...two three {four} five six...
275 }
276 do_snippet_test $T.3.12 {four OR nineteen NEAR twentyone} 0 5 {
277 ...two three {four} five six...
278 } {
279 ...two three {four} five six...
280 }
281
282 do_test $T.5.1 {
283 execsql {
284 DROP TABLE IF EXISTS ft;
285 CREATE VIRTUAL TABLE ft USING fts3(a, b, c);
286 INSERT INTO ft VALUES(
287 'one two three four five',
288 'four five six seven eight',
289 'seven eight nine ten eleven'
290 );
291 }
292 } {}
293
294 do_snippet_test $T.5.2 {five} -1 3 {...three four {five}}
295 do_snippet_test $T.5.3 {five} 0 3 {...three four {five}}
296 do_snippet_test $T.5.4 {five} 1 3 {four {five} six...}
297 do_snippet_test $T.5.5 {five} 2 3 {seven eight nine...}
298
299 do_test $T.5.6 {
300 execsql { UPDATE ft SET b = NULL }
301 } {}
302
303 do_snippet_test $T.5.7 {five} -1 3 {...three four {five}}
304 do_snippet_test $T.5.8 {five} 0 3 {...three four {five}}
305 do_snippet_test $T.5.9 {five} 1 3 {}
306 do_snippet_test $T.5.10 {five} 2 3 {seven eight nine...}
307
308 do_snippet_test $T.5.11 {one "seven eight nine"} -1 -3 {
309 {one} two three...{seven} {eight} {nine}...
310 }
311
312 do_test $T.6.1 {
313 execsql {
314 DROP TABLE IF EXISTS ft;
315 CREATE VIRTUAL TABLE ft USING fts3(x);
316 INSERT INTO ft VALUES($numbers);
317 }
318 } {}
319 do_snippet_test $T.6.2 {
320 one fifty onehundred onehundredfifty twohundredfifty threehundred
321 } -1 4 {
322 {one}...{fifty}...{onehundred}...{onehundredfifty}...
323 }
324 do_snippet_test $T.6.3 {
325 one fifty onehundred onehundredfifty twohundredfifty threehundred
326 } -1 -4 {
327 {one} two three four...fortyeight fortynine {fifty} fiftyone...ninetyeight ninetynine {onehundred} onehundredone...onehundredfortyeight onehundredfortynine {onehundredfifty} onehundredfiftyone...
328 }
329
330 do_test $T.7.1 {
331 execsql {
332 BEGIN;
333 DROP TABLE IF EXISTS ft;
334 CREATE VIRTUAL TABLE ft USING fts3(x);
335 }
336 set testresults [list]
337 for {set i 1} {$i < 150} {incr i} {
338 set commas [string repeat , $i]
339 execsql {INSERT INTO ft VALUES('one' || $commas || 'two')}
340 lappend testresults "{one}$commas{two}"
341 }
342 execsql COMMIT
343 } {}
344 do_snippet_test $T.7.2 {one two} -1 3 {*}$testresults
345
346 ##########################################################################
347 # Test the matchinfo function.
348 #
349 proc mit {blob} {
350 set scan(littleEndian) i*
351 set scan(bigEndian) I*
352 binary scan $blob $scan($::tcl_platform(byteOrder)) r
353 return $r
354 }
355 db func mit mit
356 proc do_matchinfo_test {name expr args} {
357 set res [list]
358 foreach a $args { lappend res [normalize $a] }
359 do_select_test $name {
360 SELECT mit(matchinfo(ft)) FROM ft WHERE ft MATCH $expr
361 } $res
362 }
363 do_test $T.4.1 {
364 set ten {one two three four five six seven eight nine ten}
365 execsql {
366 DROP TABLE IF EXISTS ft;
367 CREATE VIRTUAL TABLE ft USING fts3;
368 INSERT INTO ft VALUES($ten);
369 INSERT INTO ft VALUES($ten || ' ' || $ten);
370 }
371 } {}
372
373 do_matchinfo_test $T.4.2 "one" {1 1 1 3 2} {1 1 2 3 2}
374 do_matchinfo_test $T.4.3 "one NEAR/3 ten" {2 1 1 1 1 1 1 1}
375 do_matchinfo_test $T.4.4 "five NEAR/4 ten" \
376 {2 1 1 3 2 1 3 2} {2 1 2 3 2 2 3 2}
377 do_matchinfo_test $T.4.5 "six NEAR/3 ten NEAR/3 two" \
378 {3 1 1 1 1 1 1 1 1 1 1}
379 do_matchinfo_test $T.4.6 "five NEAR/4 ten NEAR/3 two" \
380 {3 1 2 2 1 1 1 1 1 1 1}
381
382 do_test $T.8.1 {
383 execsql {
384 DROP TABLE IF EXISTS ft;
385 CREATE VIRTUAL TABLE ft USING fts3(x, y);
386 }
387 foreach n {1 2 3} {
388 set v1 [lrange $numbers 0 [expr $n*100]]
389 set v2 [string trim [string repeat "$numbers " $n]]
390 set docid [expr $n * 1000000]
391 execsql { INSERT INTO ft(docid, x, y) VALUES($docid, $v1, $v2) }
392 }
393 } {}
394 do_matchinfo_test $T.8.2 {two*} \
395 { 1 2 1 105 3 101 606 3} \
396 { 1 2 3 105 3 202 606 3} \
397 { 1 2 101 105 3 303 606 3}
398
399 do_matchinfo_test $T.8.4 {"one* two*"} \
400 { 1 2 1 5 3 2 12 3} \
401 { 1 2 2 5 3 4 12 3} \
402 { 1 2 2 5 3 6 12 3}
403
404 do_matchinfo_test $T.8.5 {twohundredfifty} \
405 { 1 2 0 1 1 1 6 3} \
406 { 1 2 0 1 1 2 6 3} \
407 { 1 2 1 1 1 3 6 3}
408
409 do_matchinfo_test $T.8.6 {"threehundred one"} \
410 { 1 2 0 0 0 1 3 2} \
411 { 1 2 0 0 0 2 3 2}
412
413 do_matchinfo_test $T.8.7 {one OR fivehundred} \
414 { 2 2 1 3 3 1 6 3 0 0 0 0 0 0 } \
415 { 2 2 1 3 3 2 6 3 0 0 0 0 0 0 } \
416 { 2 2 1 3 3 3 6 3 0 0 0 0 0 0 }
417
418 do_matchinfo_test $T.8.8 {two OR "threehundred one"} \
419 { 2 2 1 3 3 1 6 3 0 0 0 0 3 2 } \
420 { 2 2 1 3 3 2 6 3 0 0 0 1 3 2 } \
421 { 2 2 1 3 3 3 6 3 0 0 0 2 3 2 }
422
423 do_select_test $T.8.9 {
424 SELECT mit(matchinfo(ft)), mit(matchinfo(ft))
425 FROM ft WHERE ft MATCH 'two OR "threehundred one"'
426 } [normalize {
427 {2 2 1 3 3 1 6 3 0 0 0 0 3 2}
428 {2 2 1 3 3 1 6 3 0 0 0 0 3 2}
429 {2 2 1 3 3 2 6 3 0 0 0 1 3 2}
430 {2 2 1 3 3 2 6 3 0 0 0 1 3 2}
431 {2 2 1 3 3 3 6 3 0 0 0 2 3 2}
432 {2 2 1 3 3 3 6 3 0 0 0 2 3 2}
433 }]
dan31745982010-01-07 10:54:28 +0000434}
435
dan3996a062010-01-15 17:25:52 +0000436set sqlite_fts3_enable_parentheses 0
danb023b042010-01-06 17:19:21 +0000437finish_test