drh | e847d32 | 2011-01-20 02:56:37 +0000 | [diff] [blame] | 1 | # 2011 January 19 |
| 2 | # |
| 3 | # The author disclaims copyright to this source code. In place of |
| 4 | # a legal notice, here is a blessing: |
| 5 | # |
| 6 | # May you do good and not evil. |
| 7 | # May you find forgiveness for yourself and forgive others. |
| 8 | # May you share freely, never taking more than you give. |
| 9 | # |
| 10 | #*********************************************************************** |
| 11 | # |
| 12 | # This file implements tests for SQLite library. The focus of the tests |
dan | f52bb8d | 2013-08-03 20:24:58 +0000 | [diff] [blame] | 13 | # in this file is the use of the sqlite_stat4 histogram data on tables |
drh | e847d32 | 2011-01-20 02:56:37 +0000 | [diff] [blame] | 14 | # with many repeated values and only a few distinct values. |
| 15 | # |
| 16 | |
| 17 | set testdir [file dirname $argv0] |
| 18 | source $testdir/tester.tcl |
| 19 | |
dan | 8ad169a | 2013-08-12 20:14:04 +0000 | [diff] [blame] | 20 | ifcapable !stat4&&!stat3 { |
drh | e847d32 | 2011-01-20 02:56:37 +0000 | [diff] [blame] | 21 | finish_test |
| 22 | return |
| 23 | } |
| 24 | |
| 25 | set testprefix analyze5 |
| 26 | |
| 27 | proc eqp {sql {db db}} { |
| 28 | uplevel execsql [list "EXPLAIN QUERY PLAN $sql"] $db |
| 29 | } |
| 30 | |
dan | f52bb8d | 2013-08-03 20:24:58 +0000 | [diff] [blame] | 31 | proc alpha {blob} { |
| 32 | set ret "" |
| 33 | foreach c [split $blob {}] { |
| 34 | if {[string is alpha $c]} {append ret $c} |
| 35 | } |
| 36 | return $ret |
| 37 | } |
| 38 | db func alpha alpha |
| 39 | |
dan | dd6e1f1 | 2013-08-10 19:08:30 +0000 | [diff] [blame] | 40 | db func lindex lindex |
| 41 | |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 42 | unset -nocomplain i t u v w x y z |
drh | e847d32 | 2011-01-20 02:56:37 +0000 | [diff] [blame] | 43 | do_test analyze5-1.0 { |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 44 | db eval {CREATE TABLE t1(t,u,v TEXT COLLATE nocase,w,x,y,z)} |
drh | e847d32 | 2011-01-20 02:56:37 +0000 | [diff] [blame] | 45 | for {set i 0} {$i < 1000} {incr i} { |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 46 | set y [expr {$i>=25 && $i<=50}] |
| 47 | set z [expr {($i>=400) + ($i>=700) + ($i>=875)}] |
| 48 | set x $z |
| 49 | set w $z |
| 50 | set t [expr {$z+0.5}] |
| 51 | switch $z { |
| 52 | 0 {set u "alpha"; unset x} |
| 53 | 1 {set u "bravo"} |
| 54 | 2 {set u "charlie"} |
| 55 | 3 {set u "delta"; unset w} |
| 56 | } |
| 57 | if {$i%2} {set v $u} {set v [string toupper $u]} |
| 58 | db eval {INSERT INTO t1 VALUES($t,$u,$v,$w,$x,$y,$z)} |
drh | e847d32 | 2011-01-20 02:56:37 +0000 | [diff] [blame] | 59 | } |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 60 | db eval { |
| 61 | CREATE INDEX t1t ON t1(t); -- 0.5, 1.5, 2.5, and 3.5 |
| 62 | CREATE INDEX t1u ON t1(u); -- text |
| 63 | CREATE INDEX t1v ON t1(v); -- mixed case text |
| 64 | CREATE INDEX t1w ON t1(w); -- integers 0, 1, 2 and a few NULLs |
| 65 | CREATE INDEX t1x ON t1(x); -- integers 1, 2, 3 and many NULLs |
| 66 | CREATE INDEX t1y ON t1(y); -- integers 0 and very few 1s |
| 67 | CREATE INDEX t1z ON t1(z); -- integers 0, 1, 2, and 3 |
drh | e847d32 | 2011-01-20 02:56:37 +0000 | [diff] [blame] | 68 | ANALYZE; |
dan | 8ad169a | 2013-08-12 20:14:04 +0000 | [diff] [blame] | 69 | } |
| 70 | ifcapable stat4 { |
| 71 | db eval { |
| 72 | SELECT DISTINCT lindex(test_decode(sample),0) |
| 73 | FROM sqlite_stat4 WHERE idx='t1u' ORDER BY nlt; |
| 74 | } |
| 75 | } else { |
| 76 | db eval { |
| 77 | SELECT sample FROM sqlite_stat3 WHERE idx='t1u' ORDER BY nlt; |
| 78 | } |
drh | e847d32 | 2011-01-20 02:56:37 +0000 | [diff] [blame] | 79 | } |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 80 | } {alpha bravo charlie delta} |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 81 | |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 82 | do_test analyze5-1.1 { |
dan | 8ad169a | 2013-08-12 20:14:04 +0000 | [diff] [blame] | 83 | ifcapable stat4 { |
| 84 | db eval { |
| 85 | SELECT DISTINCT lower(lindex(test_decode(sample), 0)) |
| 86 | FROM sqlite_stat4 WHERE idx='t1v' ORDER BY 1 |
| 87 | } |
| 88 | } else { |
| 89 | db eval { |
| 90 | SELECT lower(sample) FROM sqlite_stat3 WHERE idx='t1v' ORDER BY 1 |
| 91 | } |
dan | f52bb8d | 2013-08-03 20:24:58 +0000 | [diff] [blame] | 92 | } |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 93 | } {alpha bravo charlie delta} |
dan | 8ad169a | 2013-08-12 20:14:04 +0000 | [diff] [blame] | 94 | ifcapable stat4 { |
| 95 | do_test analyze5-1.2 { |
| 96 | db eval {SELECT idx, count(*) FROM sqlite_stat4 GROUP BY 1 ORDER BY 1} |
| 97 | } {t1t 8 t1u 8 t1v 8 t1w 8 t1x 8 t1y 9 t1z 8} |
| 98 | } else { |
| 99 | do_test analyze5-1.2 { |
| 100 | db eval {SELECT idx, count(*) FROM sqlite_stat3 GROUP BY 1 ORDER BY 1} |
| 101 | } {t1t 4 t1u 4 t1v 4 t1w 4 t1x 4 t1y 2 t1z 4} |
| 102 | } |
drh | e847d32 | 2011-01-20 02:56:37 +0000 | [diff] [blame] | 103 | |
| 104 | # Verify that range queries generate the correct row count estimates |
| 105 | # |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 106 | foreach {testid where index rows} { |
| 107 | 1 {z>=0 AND z<=0} t1z 400 |
| 108 | 2 {z>=1 AND z<=1} t1z 300 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 109 | 3 {z>=2 AND z<=2} t1z 175 |
| 110 | 4 {z>=3 AND z<=3} t1z 125 |
| 111 | 5 {z>=4 AND z<=4} t1z 1 |
| 112 | 6 {z>=-1 AND z<=-1} t1z 1 |
| 113 | 7 {z>1 AND z<3} t1z 175 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 114 | 8 {z>0 AND z<100} t1z 600 |
| 115 | 9 {z>=1 AND z<100} t1z 600 |
| 116 | 10 {z>1 AND z<100} t1z 300 |
| 117 | 11 {z>=2 AND z<100} t1z 300 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 118 | 12 {z>2 AND z<100} t1z 125 |
| 119 | 13 {z>=3 AND z<100} t1z 125 |
| 120 | 14 {z>3 AND z<100} t1z 1 |
| 121 | 15 {z>=4 AND z<100} t1z 1 |
| 122 | 16 {z>=-100 AND z<=-1} t1z 1 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 123 | 17 {z>=-100 AND z<=0} t1z 400 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 124 | 18 {z>=-100 AND z<0} t1z 1 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 125 | 19 {z>=-100 AND z<=1} t1z 700 |
| 126 | 20 {z>=-100 AND z<2} t1z 700 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 127 | 21 {z>=-100 AND z<=2} t1z 875 |
| 128 | 22 {z>=-100 AND z<3} t1z 875 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 129 | |
| 130 | 31 {z>=0.0 AND z<=0.0} t1z 400 |
| 131 | 32 {z>=1.0 AND z<=1.0} t1z 300 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 132 | 33 {z>=2.0 AND z<=2.0} t1z 175 |
| 133 | 34 {z>=3.0 AND z<=3.0} t1z 125 |
| 134 | 35 {z>=4.0 AND z<=4.0} t1z 1 |
| 135 | 36 {z>=-1.0 AND z<=-1.0} t1z 1 |
| 136 | 37 {z>1.5 AND z<3.0} t1z 174 |
| 137 | 38 {z>0.5 AND z<100} t1z 599 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 138 | 39 {z>=1.0 AND z<100} t1z 600 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 139 | 40 {z>1.5 AND z<100} t1z 299 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 140 | 41 {z>=2.0 AND z<100} t1z 300 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 141 | 42 {z>2.1 AND z<100} t1z 124 |
| 142 | 43 {z>=3.0 AND z<100} t1z 125 |
| 143 | 44 {z>3.2 AND z<100} t1z 1 |
| 144 | 45 {z>=4.0 AND z<100} t1z 1 |
| 145 | 46 {z>=-100 AND z<=-1.0} t1z 1 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 146 | 47 {z>=-100 AND z<=0.0} t1z 400 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 147 | 48 {z>=-100 AND z<0.0} t1z 1 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 148 | 49 {z>=-100 AND z<=1.0} t1z 700 |
| 149 | 50 {z>=-100 AND z<2.0} t1z 700 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 150 | 51 {z>=-100 AND z<=2.0} t1z 875 |
| 151 | 52 {z>=-100 AND z<3.0} t1z 875 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 152 | |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 153 | 101 {z=-1} t1z 1 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 154 | 102 {z=0} t1z 400 |
| 155 | 103 {z=1} t1z 300 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 156 | 104 {z=2} t1z 175 |
| 157 | 105 {z=3} t1z 125 |
| 158 | 106 {z=4} t1z 1 |
| 159 | 107 {z=-10.0} t1z 1 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 160 | 108 {z=0.0} t1z 400 |
| 161 | 109 {z=1.0} t1z 300 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 162 | 110 {z=2.0} t1z 175 |
| 163 | 111 {z=3.0} t1z 125 |
| 164 | 112 {z=4.0} t1z 1 |
| 165 | 113 {z=1.5} t1z 1 |
| 166 | 114 {z=2.5} t1z 1 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 167 | |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 168 | 201 {z IN (-1)} t1z 1 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 169 | 202 {z IN (0)} t1z 400 |
| 170 | 203 {z IN (1)} t1z 300 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 171 | 204 {z IN (2)} t1z 175 |
| 172 | 205 {z IN (3)} t1z 125 |
| 173 | 206 {z IN (4)} t1z 1 |
| 174 | 207 {z IN (0.5)} t1z 1 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 175 | 208 {z IN (0,1)} t1z 700 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 176 | 209 {z IN (0,1,2)} t1z 875 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 177 | 210 {z IN (0,1,2,3)} {} 100 |
| 178 | 211 {z IN (0,1,2,3,4,5)} {} 100 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 179 | 212 {z IN (1,2)} t1z 475 |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 180 | 213 {z IN (2,3)} t1z 300 |
| 181 | 214 {z=3 OR z=2} t1z 300 |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 182 | 215 {z IN (-1,3)} t1z 126 |
| 183 | 216 {z=-1 OR z=3} t1z 126 |
drh | e847d32 | 2011-01-20 02:56:37 +0000 | [diff] [blame] | 184 | |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 185 | 300 {y=0} t1y 974 |
| 186 | 301 {y=1} t1y 26 |
| 187 | 302 {y=0.1} t1y 1 |
drh | e847d32 | 2011-01-20 02:56:37 +0000 | [diff] [blame] | 188 | |
drh | 1f9c766 | 2011-03-17 01:34:26 +0000 | [diff] [blame] | 189 | 400 {x IS NULL} t1x 400 |
| 190 | |
drh | e847d32 | 2011-01-20 02:56:37 +0000 | [diff] [blame] | 191 | } { |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 192 | # Verify that the expected index is used with the expected row count |
drh | 8636e9c | 2013-06-11 01:50:08 +0000 | [diff] [blame] | 193 | # No longer valid due to an EXPLAIN QUERY PLAN output format change |
| 194 | # do_test analyze5-1.${testid}a { |
| 195 | # set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3] |
| 196 | # set idx {} |
| 197 | # regexp {INDEX (t1.) } $x all idx |
| 198 | # regexp {~([0-9]+) rows} $x all nrow |
| 199 | # list $idx $nrow |
| 200 | # } [list $index $rows] |
drh | 3851b74 | 2011-01-20 20:36:13 +0000 | [diff] [blame] | 201 | |
drh | fc44913 | 2011-01-24 17:46:35 +0000 | [diff] [blame] | 202 | # Verify that the same result is achieved regardless of whether or not |
| 203 | # the index is used |
| 204 | do_test analyze5-1.${testid}b { |
| 205 | set w2 [string map {y +y z +z} $where] |
| 206 | set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\ |
| 207 | ORDER BY +rowid"] |
| 208 | set a2 [db eval "SELECT rowid FROM t1 WHERE $where ORDER BY +rowid"] |
| 209 | if {$a1==$a2} { |
| 210 | set res ok |
| 211 | } else { |
| 212 | set res "a1=\[$a1\] a2=\[$a2\]" |
| 213 | } |
| 214 | set res |
| 215 | } {ok} |
drh | 5ac0607 | 2011-01-21 18:18:13 +0000 | [diff] [blame] | 216 | } |
drh | e847d32 | 2011-01-20 02:56:37 +0000 | [diff] [blame] | 217 | |
drh | 1f9c766 | 2011-03-17 01:34:26 +0000 | [diff] [blame] | 218 | # Increase the number of NULLs in column x |
| 219 | # |
| 220 | db eval { |
| 221 | UPDATE t1 SET x=NULL; |
| 222 | UPDATE t1 SET x=rowid |
| 223 | WHERE rowid IN (SELECT rowid FROM t1 ORDER BY random() LIMIT 5); |
| 224 | ANALYZE; |
| 225 | } |
| 226 | |
| 227 | # Verify that range queries generate the correct row count estimates |
| 228 | # |
| 229 | foreach {testid where index rows} { |
drh | 4e50c5e | 2011-08-13 19:35:19 +0000 | [diff] [blame] | 230 | 500 {x IS NULL AND u='charlie'} t1u 17 |
| 231 | 501 {x=1 AND u='charlie'} t1x 1 |
| 232 | 502 {x IS NULL} t1x 995 |
| 233 | 503 {x=1} t1x 1 |
| 234 | 504 {x IS NOT NULL} t1x 2 |
drh | ea6dc44 | 2011-04-08 21:35:26 +0000 | [diff] [blame] | 235 | 505 {+x IS NOT NULL} {} 500 |
| 236 | 506 {upper(x) IS NOT NULL} {} 500 |
drh | 1f9c766 | 2011-03-17 01:34:26 +0000 | [diff] [blame] | 237 | |
| 238 | } { |
| 239 | # Verify that the expected index is used with the expected row count |
drh | 8636e9c | 2013-06-11 01:50:08 +0000 | [diff] [blame] | 240 | # No longer valid due to an EXPLAIN QUERY PLAN format change |
| 241 | # do_test analyze5-1.${testid}a { |
| 242 | # set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3] |
| 243 | # set idx {} |
| 244 | # regexp {INDEX (t1.) } $x all idx |
| 245 | # regexp {~([0-9]+) rows} $x all nrow |
| 246 | # list $idx $nrow |
| 247 | # } [list $index $rows] |
drh | 1f9c766 | 2011-03-17 01:34:26 +0000 | [diff] [blame] | 248 | |
| 249 | # Verify that the same result is achieved regardless of whether or not |
| 250 | # the index is used |
| 251 | do_test analyze5-1.${testid}b { |
| 252 | set w2 [string map {y +y z +z} $where] |
| 253 | set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\ |
| 254 | ORDER BY +rowid"] |
| 255 | set a2 [db eval "SELECT rowid FROM t1 WHERE $where ORDER BY +rowid"] |
| 256 | if {$a1==$a2} { |
| 257 | set res ok |
| 258 | } else { |
| 259 | set res "a1=\[$a1\] a2=\[$a2\]" |
| 260 | } |
| 261 | set res |
| 262 | } {ok} |
| 263 | } |
drh | e847d32 | 2011-01-20 02:56:37 +0000 | [diff] [blame] | 264 | |
| 265 | finish_test |