dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 1 | # 2009 December 03 |
| 2 | # |
| 3 | # May you do good and not evil. |
| 4 | # May you find forgiveness for yourself and forgive others. |
| 5 | # May you share freely, never taking more than you give. |
| 6 | # |
| 7 | #*********************************************************************** |
| 8 | # |
| 9 | # Brute force (random data) tests for FTS3. |
| 10 | # |
| 11 | |
dan | 1548f21 | 2009-12-11 07:07:36 +0000 | [diff] [blame] | 12 | #------------------------------------------------------------------------- |
| 13 | # |
| 14 | # The FTS3 tests implemented in this file focus on testing that FTS3 |
| 15 | # returns the correct set of documents for various types of full-text |
| 16 | # query. This is done using pseudo-randomly generated data and queries. |
| 17 | # The expected result of each query is calculated using Tcl code. |
| 18 | # |
| 19 | # 1. The database is initialized to contain a single table with three |
| 20 | # columns. 100 rows are inserted into the table. Each of the three |
| 21 | # values in each row is a document consisting of between 0 and 100 |
| 22 | # terms. Terms are selected from a vocabulary of $G(nVocab) terms. |
| 23 | # |
| 24 | # 2. The following is performed 100 times: |
| 25 | # |
| 26 | # a. A row is inserted into the database. The row contents are |
| 27 | # generated as in step 1. The docid is a pseudo-randomly selected |
| 28 | # value between 0 and 1000000. |
| 29 | # |
| 30 | # b. A psuedo-randomly selected row is updated. One of its columns is |
| 31 | # set to contain a new document generated in the same way as the |
| 32 | # documents in step 1. |
| 33 | # |
| 34 | # c. A psuedo-randomly selected row is deleted. |
| 35 | # |
| 36 | # d. For each of several types of fts3 queries, 10 SELECT queries |
| 37 | # of the form: |
| 38 | # |
| 39 | # SELECT docid FROM <tbl> WHERE <tbl> MATCH '<query>' |
| 40 | # |
| 41 | # are evaluated. The results are compared to those calculated by |
| 42 | # Tcl code in this file. The patterns used for the different query |
| 43 | # types are: |
| 44 | # |
| 45 | # 1. query = <term> |
| 46 | # 2. query = <prefix> |
| 47 | # 3. query = "<term> <term>" |
| 48 | # 4. query = "<term> <term> <term>" |
| 49 | # 5. query = "<prefix> <prefix> <prefix>" |
| 50 | # 6. query = <term> NEAR <term> |
| 51 | # 7. query = <term> NEAR/11 <term> NEAR/11 <term> |
| 52 | # 8. query = <term> OR <term> |
| 53 | # 9. query = <term> NOT <term> |
| 54 | # 10. query = <term> AND <term> |
| 55 | # 11. query = <term> NEAR <term> OR <term> NEAR <term> |
| 56 | # 12. query = <term> NEAR <term> NOT <term> NEAR <term> |
| 57 | # 13. query = <term> NEAR <term> AND <term> NEAR <term> |
| 58 | # |
| 59 | # where <term> is a term psuedo-randomly selected from the vocabulary |
| 60 | # and prefix is the first 2 characters of such a term followed by |
| 61 | # a "*" character. |
| 62 | # |
| 63 | # Every second iteration, steps (a) through (d) above are performed |
| 64 | # within a single transaction. This forces the queries in (d) to |
| 65 | # read data from both the database and the in-memory hash table |
| 66 | # that caches the full-text index entries created by steps (a), (b) |
| 67 | # and (c) until the transaction is committed. |
| 68 | # |
| 69 | # The procedure above is run 5 times, using advisory fts3 node sizes of 50, |
| 70 | # 500, 1000 and 2000 bytes. |
| 71 | # |
| 72 | # After the test using an advisory node-size of 50, an OOM test is run using |
| 73 | # the database. This test is similar to step (d) above, except that it tests |
| 74 | # the effects of transient and persistent OOM conditions encountered while |
| 75 | # executing each query. |
| 76 | # |
| 77 | |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 78 | set testdir [file dirname $argv0] |
| 79 | source $testdir/tester.tcl |
| 80 | |
| 81 | # If this build does not include FTS3, skip the tests in this file. |
| 82 | # |
| 83 | ifcapable !fts3 { finish_test ; return } |
| 84 | source $testdir/fts3_common.tcl |
dan | ef37802 | 2010-05-04 11:06:03 +0000 | [diff] [blame] | 85 | source $testdir/malloc_common.tcl |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 86 | |
dan | 1548f21 | 2009-12-11 07:07:36 +0000 | [diff] [blame] | 87 | set G(nVocab) 100 |
| 88 | |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 89 | set nVocab 100 |
| 90 | set lVocab [list] |
| 91 | |
dan | b893721 | 2009-12-10 16:04:25 +0000 | [diff] [blame] | 92 | expr srand(0) |
| 93 | |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 94 | # Generate a vocabulary of nVocab words. Each word is 3 characters long. |
| 95 | # |
| 96 | set lChar {a b c d e f g h i j k l m n o p q r s t u v w x y z} |
| 97 | for {set i 0} {$i < $nVocab} {incr i} { |
dan | 45bcd6c | 2009-12-12 13:16:09 +0000 | [diff] [blame] | 98 | set len [expr int(rand()*3)+2] |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 99 | set word [lindex $lChar [expr int(rand()*26)]] |
| 100 | append word [lindex $lChar [expr int(rand()*26)]] |
dan | 45bcd6c | 2009-12-12 13:16:09 +0000 | [diff] [blame] | 101 | if {$len>2} { append word [lindex $lChar [expr int(rand()*26)]] } |
| 102 | if {$len>3} { append word [lindex $lChar [expr int(rand()*26)]] } |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 103 | lappend lVocab $word |
| 104 | } |
| 105 | |
| 106 | proc random_term {} { |
| 107 | lindex $::lVocab [expr {int(rand()*$::nVocab)}] |
| 108 | } |
| 109 | |
| 110 | # Return a document consisting of $nWord arbitrarily selected terms |
| 111 | # from the $::lVocab list. |
| 112 | # |
| 113 | proc generate_doc {nWord} { |
| 114 | set doc [list] |
| 115 | for {set i 0} {$i < $nWord} {incr i} { |
| 116 | lappend doc [random_term] |
| 117 | } |
| 118 | return $doc |
| 119 | } |
| 120 | |
| 121 | |
| 122 | |
| 123 | # Primitives to update the table. |
| 124 | # |
drh | a43be91 | 2009-12-04 01:44:42 +0000 | [diff] [blame] | 125 | unset -nocomplain t1 |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 126 | proc insert_row {rowid} { |
| 127 | set a [generate_doc [expr int((rand()*100))]] |
| 128 | set b [generate_doc [expr int((rand()*100))]] |
| 129 | set c [generate_doc [expr int((rand()*100))]] |
| 130 | execsql { INSERT INTO t1(docid, a, b, c) VALUES($rowid, $a, $b, $c) } |
| 131 | set ::t1($rowid) [list $a $b $c] |
| 132 | } |
| 133 | proc delete_row {rowid} { |
| 134 | execsql { DELETE FROM t1 WHERE rowid = $rowid } |
| 135 | catch {unset ::t1($rowid)} |
| 136 | } |
| 137 | proc update_row {rowid} { |
| 138 | set cols {a b c} |
| 139 | set iCol [expr int(rand()*3)] |
| 140 | set doc [generate_doc [expr int((rand()*100))]] |
| 141 | lset ::t1($rowid) $iCol $doc |
| 142 | execsql "UPDATE t1 SET [lindex $cols $iCol] = \$doc WHERE rowid = \$rowid" |
| 143 | } |
| 144 | |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 145 | proc simple_phrase {zPrefix} { |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 146 | set ret [list] |
dan | 45bcd6c | 2009-12-12 13:16:09 +0000 | [diff] [blame] | 147 | |
| 148 | set reg [string map {* {[^ ]*}} $zPrefix] |
| 149 | set reg " $reg " |
| 150 | |
dan | 3540c1f | 2009-12-22 18:56:19 +0000 | [diff] [blame] | 151 | foreach key [lsort -integer [array names ::t1]] { |
| 152 | set value $::t1($key) |
| 153 | set cnt [list] |
dan | 45bcd6c | 2009-12-12 13:16:09 +0000 | [diff] [blame] | 154 | foreach col $value { |
dan | 3540c1f | 2009-12-22 18:56:19 +0000 | [diff] [blame] | 155 | if {[regexp $reg " $col "]} { lappend ret $key ; break } |
dan | 45bcd6c | 2009-12-12 13:16:09 +0000 | [diff] [blame] | 156 | } |
dan | acf28fb | 2009-12-04 14:11:33 +0000 | [diff] [blame] | 157 | } |
dan | 45bcd6c | 2009-12-12 13:16:09 +0000 | [diff] [blame] | 158 | |
dan | 3540c1f | 2009-12-22 18:56:19 +0000 | [diff] [blame] | 159 | #lsort -uniq -integer $ret |
| 160 | set ret |
dan | acf28fb | 2009-12-04 14:11:33 +0000 | [diff] [blame] | 161 | } |
dan | 45bcd6c | 2009-12-12 13:16:09 +0000 | [diff] [blame] | 162 | |
dan | ceafa47 | 2010-01-14 11:17:05 +0000 | [diff] [blame] | 163 | # This [proc] is used to test the FTS3 matchinfo() function. |
| 164 | # |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 165 | proc simple_token_matchinfo {zToken bDesc} { |
dan | a98af17 | 2010-01-02 19:02:02 +0000 | [diff] [blame] | 166 | |
dan | ceafa47 | 2010-01-14 11:17:05 +0000 | [diff] [blame] | 167 | set nDoc(0) 0 |
| 168 | set nDoc(1) 0 |
| 169 | set nDoc(2) 0 |
| 170 | set nHit(0) 0 |
| 171 | set nHit(1) 0 |
| 172 | set nHit(2) 0 |
| 173 | |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 174 | set dir -inc |
| 175 | if {$bDesc} { set dir -dec } |
dan | ceafa47 | 2010-01-14 11:17:05 +0000 | [diff] [blame] | 176 | |
| 177 | foreach key [array names ::t1] { |
dan | 3540c1f | 2009-12-22 18:56:19 +0000 | [diff] [blame] | 178 | set value $::t1($key) |
dan | ceafa47 | 2010-01-14 11:17:05 +0000 | [diff] [blame] | 179 | set a($key) [list] |
dan | a98af17 | 2010-01-02 19:02:02 +0000 | [diff] [blame] | 180 | foreach i {0 1 2} col $value { |
dan | ceafa47 | 2010-01-14 11:17:05 +0000 | [diff] [blame] | 181 | set hit [llength [lsearch -all $col $zToken]] |
| 182 | lappend a($key) $hit |
| 183 | incr nHit($i) $hit |
| 184 | if {$hit>0} { incr nDoc($i) } |
dan | 3540c1f | 2009-12-22 18:56:19 +0000 | [diff] [blame] | 185 | } |
| 186 | } |
dan | ceafa47 | 2010-01-14 11:17:05 +0000 | [diff] [blame] | 187 | |
| 188 | set ret [list] |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 189 | foreach docid [lsort -integer $dir [array names a]] { |
dan | ceafa47 | 2010-01-14 11:17:05 +0000 | [diff] [blame] | 190 | if { [lindex [lsort -integer $a($docid)] end] } { |
| 191 | set matchinfo [list 1 3] |
| 192 | foreach i {0 1 2} hit $a($docid) { |
| 193 | lappend matchinfo $hit $nHit($i) $nDoc($i) |
| 194 | } |
| 195 | lappend ret $docid $matchinfo |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | set ret |
dan | 3540c1f | 2009-12-22 18:56:19 +0000 | [diff] [blame] | 200 | } |
| 201 | |
dan | acf28fb | 2009-12-04 14:11:33 +0000 | [diff] [blame] | 202 | proc simple_near {termlist nNear} { |
dan | 165b67c | 2009-12-04 19:07:24 +0000 | [diff] [blame] | 203 | set ret [list] |
| 204 | |
dan | 165b67c | 2009-12-04 19:07:24 +0000 | [diff] [blame] | 205 | foreach {key value} [array get ::t1] { |
| 206 | foreach v $value { |
dan | 165b67c | 2009-12-04 19:07:24 +0000 | [diff] [blame] | 207 | |
| 208 | set l [lsearch -exact -all $v [lindex $termlist 0]] |
| 209 | foreach T [lrange $termlist 1 end] { |
| 210 | set l2 [list] |
| 211 | foreach i $l { |
| 212 | set iStart [expr $i - $nNear - 1] |
| 213 | set iEnd [expr $i + $nNear + 1] |
dan | 28f372f | 2009-12-05 14:29:22 +0000 | [diff] [blame] | 214 | if {$iStart < 0} {set iStart 0} |
dan | 165b67c | 2009-12-04 19:07:24 +0000 | [diff] [blame] | 215 | foreach i2 [lsearch -exact -all [lrange $v $iStart $iEnd] $T] { |
| 216 | incr i2 $iStart |
| 217 | if {$i2 != $i} { lappend l2 $i2 } |
| 218 | } |
| 219 | } |
dan | 28f372f | 2009-12-05 14:29:22 +0000 | [diff] [blame] | 220 | set l [lsort -uniq -integer $l2] |
dan | 165b67c | 2009-12-04 19:07:24 +0000 | [diff] [blame] | 221 | } |
| 222 | |
| 223 | if {[llength $l]} { |
dan | 28f372f | 2009-12-05 14:29:22 +0000 | [diff] [blame] | 224 | #puts "MATCH($key): $v" |
dan | 165b67c | 2009-12-04 19:07:24 +0000 | [diff] [blame] | 225 | lappend ret $key |
| 226 | } |
| 227 | } |
| 228 | } |
| 229 | |
| 230 | lsort -unique -integer $ret |
dan | acf28fb | 2009-12-04 14:11:33 +0000 | [diff] [blame] | 231 | } |
| 232 | |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 233 | # The following three procs: |
| 234 | # |
| 235 | # setup_not A B |
| 236 | # setup_or A B |
| 237 | # setup_and A B |
| 238 | # |
| 239 | # each take two arguments. Both arguments must be lists of integer values |
| 240 | # sorted by value. The return value is the list produced by evaluating |
| 241 | # the equivalent of "A op B", where op is the FTS3 operator NOT, OR or |
| 242 | # AND. |
| 243 | # |
| 244 | proc setop_not {A B} { |
| 245 | foreach b $B { set n($b) {} } |
| 246 | set ret [list] |
| 247 | foreach a $A { if {![info exists n($a)]} {lappend ret $a} } |
| 248 | return $ret |
| 249 | } |
| 250 | proc setop_or {A B} { |
| 251 | lsort -integer -uniq [concat $A $B] |
| 252 | } |
| 253 | proc setop_and {A B} { |
| 254 | foreach b $B { set n($b) {} } |
| 255 | set ret [list] |
| 256 | foreach a $A { if {[info exists n($a)]} {lappend ret $a} } |
| 257 | return $ret |
| 258 | } |
| 259 | |
dan | 3540c1f | 2009-12-22 18:56:19 +0000 | [diff] [blame] | 260 | proc mit {blob} { |
| 261 | set scan(littleEndian) i* |
| 262 | set scan(bigEndian) I* |
| 263 | binary scan $blob $scan($::tcl_platform(byteOrder)) r |
| 264 | return $r |
| 265 | } |
| 266 | db func mit mit |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 267 | set sqlite_fts3_enable_parentheses 1 |
| 268 | |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 269 | proc do_orderbydocid_test {tn sql res} { |
| 270 | uplevel [list do_select_test $tn.asc "$sql ORDER BY docid ASC" $res] |
| 271 | uplevel [list do_select_test $tn.desc "$sql ORDER BY docid DESC" \ |
| 272 | [lsort -int -dec $res] |
| 273 | ] |
| 274 | } |
| 275 | |
dan | 786b068 | 2011-06-09 10:48:02 +0000 | [diff] [blame] | 276 | set NUM_TRIALS 100 |
| 277 | |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 278 | foreach {nodesize order} { |
| 279 | 50 DESC |
| 280 | 50 ASC |
| 281 | 500 ASC |
| 282 | 1000 DESC |
| 283 | 2000 ASC |
| 284 | } { |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 285 | catch { array unset ::t1 } |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 286 | set testname "$nodesize/$order" |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 287 | |
| 288 | # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows. |
| 289 | # |
| 290 | db transaction { |
| 291 | catchsql { DROP TABLE t1 } |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 292 | execsql "CREATE VIRTUAL TABLE t1 USING fts4(a, b, c, order=$order)" |
dan | 601cd9a | 2009-12-11 16:03:45 +0000 | [diff] [blame] | 293 | execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')" |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 294 | for {set i 0} {$i < 100} {incr i} { insert_row $i } |
| 295 | } |
| 296 | |
dan | 786b068 | 2011-06-09 10:48:02 +0000 | [diff] [blame] | 297 | for {set iTest 1} {$iTest <= $NUM_TRIALS} {incr iTest} { |
dan | b893721 | 2009-12-10 16:04:25 +0000 | [diff] [blame] | 298 | catchsql COMMIT |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 299 | |
| 300 | set DO_MALLOC_TEST 0 |
| 301 | set nRep 10 |
| 302 | if {$iTest==100 && $nodesize==50} { |
| 303 | set DO_MALLOC_TEST 1 |
| 304 | set nRep 2 |
| 305 | } |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 306 | |
| 307 | set ::testprefix fts3rnd-1.$testname.$iTest |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 308 | |
| 309 | # Delete one row, update one row and insert one row. |
| 310 | # |
| 311 | set rows [array names ::t1] |
| 312 | set nRow [llength $rows] |
| 313 | set iUpdate [lindex $rows [expr {int(rand()*$nRow)}]] |
| 314 | set iDelete $iUpdate |
| 315 | while {$iDelete == $iUpdate} { |
| 316 | set iDelete [lindex $rows [expr {int(rand()*$nRow)}]] |
| 317 | } |
| 318 | set iInsert $iUpdate |
| 319 | while {[info exists ::t1($iInsert)]} { |
| 320 | set iInsert [expr {int(rand()*1000000)}] |
| 321 | } |
dan | b893721 | 2009-12-10 16:04:25 +0000 | [diff] [blame] | 322 | execsql BEGIN |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 323 | insert_row $iInsert |
| 324 | update_row $iUpdate |
| 325 | delete_row $iDelete |
dan | b893721 | 2009-12-10 16:04:25 +0000 | [diff] [blame] | 326 | if {0==($iTest%2)} { execsql COMMIT } |
dan | 28f372f | 2009-12-05 14:29:22 +0000 | [diff] [blame] | 327 | |
dan | 4d8d278 | 2010-12-02 17:39:26 +0000 | [diff] [blame] | 328 | if {0==($iTest%2)} { |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 329 | #do_test 0 { fts3_integrity_check t1 } ok |
dan | 4d8d278 | 2010-12-02 17:39:26 +0000 | [diff] [blame] | 330 | } |
| 331 | |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 332 | # Pick 10 terms from the vocabulary. Check that the results of querying |
| 333 | # the database for the set of documents containing each of these terms |
| 334 | # is the same as the result obtained by scanning the contents of the Tcl |
| 335 | # array for each term. |
| 336 | # |
dan | acf28fb | 2009-12-04 14:11:33 +0000 | [diff] [blame] | 337 | for {set i 0} {$i < 10} {incr i} { |
| 338 | set term [random_term] |
dan | 786b068 | 2011-06-09 10:48:02 +0000 | [diff] [blame] | 339 | do_select_test 1.$i.asc { |
dan | 3540c1f | 2009-12-22 18:56:19 +0000 | [diff] [blame] | 340 | SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 341 | ORDER BY docid ASC |
| 342 | } [simple_token_matchinfo $term 0] |
dan | 786b068 | 2011-06-09 10:48:02 +0000 | [diff] [blame] | 343 | do_select_test 1.$i.desc { |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 344 | SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term |
| 345 | ORDER BY docid DESC |
| 346 | } [simple_token_matchinfo $term 1] |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 347 | } |
| 348 | |
dan | acf28fb | 2009-12-04 14:11:33 +0000 | [diff] [blame] | 349 | # This time, use the first two characters of each term as a term prefix |
| 350 | # to query for. Test that querying the Tcl array produces the same results |
| 351 | # as querying the FTS3 table for the prefix. |
| 352 | # |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 353 | for {set i 0} {$i < $nRep} {incr i} { |
dan | 45bcd6c | 2009-12-12 13:16:09 +0000 | [diff] [blame] | 354 | set prefix [string range [random_term] 0 end-1] |
dan | acf28fb | 2009-12-04 14:11:33 +0000 | [diff] [blame] | 355 | set match "${prefix}*" |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 356 | do_orderbydocid_test 2.$i { |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 357 | SELECT docid FROM t1 WHERE t1 MATCH $match |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 358 | } [simple_phrase $match] |
dan | acf28fb | 2009-12-04 14:11:33 +0000 | [diff] [blame] | 359 | } |
| 360 | |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 361 | # Similar to the above, except for phrase queries. |
| 362 | # |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 363 | for {set i 0} {$i < $nRep} {incr i} { |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 364 | set term [list [random_term] [random_term]] |
| 365 | set match "\"$term\"" |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 366 | do_orderbydocid_test 3.$i { |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 367 | SELECT docid FROM t1 WHERE t1 MATCH $match |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 368 | } [simple_phrase $term] |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 369 | } |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 370 | |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 371 | # Three word phrases. |
| 372 | # |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 373 | for {set i 0} {$i < $nRep} {incr i} { |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 374 | set term [list [random_term] [random_term] [random_term]] |
| 375 | set match "\"$term\"" |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 376 | do_orderbydocid_test 4.$i { |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 377 | SELECT docid FROM t1 WHERE t1 MATCH $match |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 378 | } [simple_phrase $term] |
| 379 | } |
| 380 | |
| 381 | # Three word phrases made up of term-prefixes. |
| 382 | # |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 383 | for {set i 0} {$i < $nRep} {incr i} { |
dan | 45bcd6c | 2009-12-12 13:16:09 +0000 | [diff] [blame] | 384 | set query "[string range [random_term] 0 end-1]* " |
| 385 | append query "[string range [random_term] 0 end-1]* " |
| 386 | append query "[string range [random_term] 0 end-1]*" |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 387 | |
| 388 | set match "\"$query\"" |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 389 | do_orderbydocid_test 5.$i { |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 390 | SELECT docid FROM t1 WHERE t1 MATCH $match |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 391 | } [simple_phrase $query] |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 392 | } |
dan | acf28fb | 2009-12-04 14:11:33 +0000 | [diff] [blame] | 393 | |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 394 | # A NEAR query with terms as the arguments: |
| 395 | # |
| 396 | # ... MATCH '$term1 NEAR $term2' ... |
dan | acf28fb | 2009-12-04 14:11:33 +0000 | [diff] [blame] | 397 | # |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 398 | for {set i 0} {$i < $nRep} {incr i} { |
dan | acf28fb | 2009-12-04 14:11:33 +0000 | [diff] [blame] | 399 | set terms [list [random_term] [random_term]] |
| 400 | set match [join $terms " NEAR "] |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 401 | do_orderbydocid_test 6.$i { |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 402 | SELECT docid FROM t1 WHERE t1 MATCH $match |
dan | 165b67c | 2009-12-04 19:07:24 +0000 | [diff] [blame] | 403 | } [simple_near $terms 10] |
| 404 | } |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 405 | |
dan | 165b67c | 2009-12-04 19:07:24 +0000 | [diff] [blame] | 406 | # A 3-way NEAR query with terms as the arguments. |
| 407 | # |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 408 | for {set i 0} {$i < $nRep} {incr i} { |
dan | 165b67c | 2009-12-04 19:07:24 +0000 | [diff] [blame] | 409 | set terms [list [random_term] [random_term] [random_term]] |
dan | 28f372f | 2009-12-05 14:29:22 +0000 | [diff] [blame] | 410 | set nNear 11 |
| 411 | set match [join $terms " NEAR/$nNear "] |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 412 | do_orderbydocid_test 7.$i { |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 413 | SELECT docid FROM t1 WHERE t1 MATCH $match |
dan | 28f372f | 2009-12-05 14:29:22 +0000 | [diff] [blame] | 414 | } [simple_near $terms $nNear] |
dan | acf28fb | 2009-12-04 14:11:33 +0000 | [diff] [blame] | 415 | } |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 416 | |
| 417 | # Set operations on simple term queries. |
| 418 | # |
| 419 | foreach {tn op proc} { |
| 420 | 8 OR setop_or |
| 421 | 9 NOT setop_not |
| 422 | 10 AND setop_and |
| 423 | } { |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 424 | for {set i 0} {$i < $nRep} {incr i} { |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 425 | set term1 [random_term] |
| 426 | set term2 [random_term] |
| 427 | set match "$term1 $op $term2" |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 428 | do_orderbydocid_test $tn.$i { |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 429 | SELECT docid FROM t1 WHERE t1 MATCH $match |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 430 | } [$proc [simple_phrase $term1] [simple_phrase $term2]] |
| 431 | } |
| 432 | } |
| 433 | |
| 434 | # Set operations on NEAR queries. |
| 435 | # |
| 436 | foreach {tn op proc} { |
dan | 786b068 | 2011-06-09 10:48:02 +0000 | [diff] [blame] | 437 | 11 OR setop_or |
| 438 | 12 NOT setop_not |
| 439 | 13 AND setop_and |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 440 | } { |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 441 | for {set i 0} {$i < $nRep} {incr i} { |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 442 | set term1 [random_term] |
| 443 | set term2 [random_term] |
| 444 | set term3 [random_term] |
| 445 | set term4 [random_term] |
| 446 | set match "$term1 NEAR $term2 $op $term3 NEAR $term4" |
dan | 5289b01 | 2011-06-06 14:51:50 +0000 | [diff] [blame] | 447 | do_orderbydocid_test $tn.$i { |
dan | 18ff7fa | 2009-12-09 14:39:41 +0000 | [diff] [blame] | 448 | SELECT docid FROM t1 WHERE t1 MATCH $match |
dan | ff32e39 | 2009-12-07 12:34:51 +0000 | [diff] [blame] | 449 | } [$proc \ |
| 450 | [simple_near [list $term1 $term2] 10] \ |
| 451 | [simple_near [list $term3 $term4] 10] |
| 452 | ] |
| 453 | } |
| 454 | } |
dan | b893721 | 2009-12-10 16:04:25 +0000 | [diff] [blame] | 455 | |
| 456 | catchsql COMMIT |
dan | e2e5145 | 2009-12-03 17:36:22 +0000 | [diff] [blame] | 457 | } |
| 458 | } |
| 459 | |
| 460 | finish_test |