blob: 0401b13d7273b32d237fa4721e2f32b34de3d71e [file] [log] [blame]
drh14172742012-12-31 19:18:38 +00001# 2012 December 31
2#
3# The author disclaims copyright to this source code. In place of
4# a legal notice, here is a blessing:
5#
6# May you do good and not evil.
7# May you find forgiveness for yourself and forgive others.
8# May you share freely, never taking more than you give.
9#
10#***********************************************************************
11#
12# This file implements test for the REGEXP operator in test_regexp.c.
13#
14
15set testdir [file dirname $argv0]
16source $testdir/tester.tcl
17
18do_test regexp1-1.1 {
drh248f2be2013-04-23 20:10:13 +000019 load_static_extension db regexp
drh14172742012-12-31 19:18:38 +000020 db eval {
21 CREATE TABLE t1(x INTEGER PRIMARY KEY, y TEXT);
22 INSERT INTO t1 VALUES(1, 'For since by man came death,');
23 INSERT INTO t1 VALUES(2, 'by man came also the resurrection of the dead.');
24 INSERT INTO t1 VALUES(3, 'For as in Adam all die,');
25 INSERT INTO t1 VALUES(4, 'even so in Christ shall all be made alive.');
26
27 SELECT x FROM t1 WHERE y REGEXP '^For ' ORDER BY x;
28 }
29} {1 3}
30
drh75f73172021-06-03 12:31:08 +000031do_execsql_test regexp1-1.1.2 {
32 SELECT regexpi('abc','ABC');
33} {1}
34do_execsql_test regexp1-1.1.3 {
35 SELECT regexpi('ABC','ABC');
36} {1}
37do_execsql_test regexp1-1.1.4 {
38 SELECT regexpi('ABC','abc');
39} {1}
drhde30a3d2021-06-03 13:44:19 +000040do_execsql_test regexp1-1.1.5 {
41 SELECT regexpi('ABC.','ABC');
42} {0}
drh75f73172021-06-03 12:31:08 +000043
drh14172742012-12-31 19:18:38 +000044do_execsql_test regexp1-1.2 {
45 SELECT x FROM t1 WHERE y REGEXP 'by|in' ORDER BY x;
46} {1 2 3 4}
drh75f73172021-06-03 12:31:08 +000047do_execsql_test regexp1-1.3.1 {
drh14172742012-12-31 19:18:38 +000048 SELECT x FROM t1 WHERE y REGEXP 'by|Christ' ORDER BY x;
49} {1 2 4}
drh75f73172021-06-03 12:31:08 +000050do_execsql_test regexp1-1.3.2 {
51 SELECT x FROM t1 WHERE regexp('by|christ',y) ORDER BY x;
52} {1 2}
53do_execsql_test regexp1-1.3.3 {
54 SELECT x FROM t1 WHERE regexpi('by|christ',y) ORDER BY x;
55} {1 2 4}
56do_execsql_test regexp1-1.3.4 {
57 SELECT x FROM t1 WHERE regexpi('BY|CHRIST',y) ORDER BY x;
58} {1 2 4}
drh14172742012-12-31 19:18:38 +000059do_execsql_test regexp1-1.4 {
60 SELECT x FROM t1 WHERE y REGEXP 'shal+ al+' ORDER BY x;
61} {4}
drh75f73172021-06-03 12:31:08 +000062do_execsql_test regexp1-1.5.1 {
drh14172742012-12-31 19:18:38 +000063 SELECT x FROM t1 WHERE y REGEXP 'shall x*y*z*all' ORDER BY x;
64} {4}
drh75f73172021-06-03 12:31:08 +000065do_execsql_test regexp1-1.5.2 {
66 SELECT x FROM t1 WHERE regexp('shall x*y*z*all',y) ORDER BY x;
67} {4}
68do_execsql_test regexp1-1.5.3 {
69 SELECT x FROM t1 WHERE regexp('SHALL x*y*z*all',y) ORDER BY x;
70} {}
71do_execsql_test regexp1-1.5.4 {
72 SELECT x FROM t1 WHERE regexpi('SHALL x*y*z*all',y) ORDER BY x;
73} {4}
drh14172742012-12-31 19:18:38 +000074do_execsql_test regexp1-1.6 {
75 SELECT x FROM t1 WHERE y REGEXP 'shallx?y? ?z?all' ORDER BY x;
76} {4}
77do_execsql_test regexp1-1.7 {
78 SELECT x FROM t1 WHERE y REGEXP 'r{2}' ORDER BY x;
79} {2}
80do_execsql_test regexp1-1.8 {
81 SELECT x FROM t1 WHERE y REGEXP 'r{3}' ORDER BY x;
82} {}
83do_execsql_test regexp1-1.9 {
84 SELECT x FROM t1 WHERE y REGEXP 'r{1}' ORDER BY x;
85} {1 2 3 4}
86do_execsql_test regexp1-1.10 {
87 SELECT x FROM t1 WHERE y REGEXP 'ur{2,10}e' ORDER BY x;
88} {2}
89do_execsql_test regexp1-1.11 {
90 SELECT x FROM t1 WHERE y REGEXP '[Aa]dam' ORDER BY x;
91} {3}
92do_execsql_test regexp1-1.12 {
93 SELECT x FROM t1 WHERE y REGEXP '[^Aa]dam' ORDER BY x;
94} {}
95do_execsql_test regexp1-1.13 {
96 SELECT x FROM t1 WHERE y REGEXP '[^b-zB-Z]dam' ORDER BY x;
97} {3}
98do_execsql_test regexp1-1.14 {
99 SELECT x FROM t1 WHERE y REGEXP 'alive' ORDER BY x;
100} {4}
101do_execsql_test regexp1-1.15 {
102 SELECT x FROM t1 WHERE y REGEXP '^alive' ORDER BY x;
103} {}
104do_execsql_test regexp1-1.16 {
105 SELECT x FROM t1 WHERE y REGEXP 'alive$' ORDER BY x;
106} {}
107do_execsql_test regexp1-1.17 {
108 SELECT x FROM t1 WHERE y REGEXP 'alive.$' ORDER BY x;
109} {4}
110do_execsql_test regexp1-1.18 {
111 SELECT x FROM t1 WHERE y REGEXP 'alive\.$' ORDER BY x;
112} {4}
drh25846af2012-12-31 20:16:35 +0000113do_execsql_test regexp1-1.19 {
114 SELECT x FROM t1 WHERE y REGEXP 'ma[nd]' ORDER BY x;
115} {1 2 4}
116do_execsql_test regexp1-1.20 {
117 SELECT x FROM t1 WHERE y REGEXP '\bma[nd]' ORDER BY x;
118} {1 2 4}
119do_execsql_test regexp1-1.21 {
120 SELECT x FROM t1 WHERE y REGEXP 'ma[nd]\b' ORDER BY x;
121} {1 2}
122do_execsql_test regexp1-1.22 {
123 SELECT x FROM t1 WHERE y REGEXP 'ma\w' ORDER BY x;
124} {1 2 4}
125do_execsql_test regexp1-1.23 {
126 SELECT x FROM t1 WHERE y REGEXP 'ma\W' ORDER BY x;
127} {}
128do_execsql_test regexp1-1.24 {
129 SELECT x FROM t1 WHERE y REGEXP '\sma\w' ORDER BY x;
130} {1 2 4}
131do_execsql_test regexp1-1.25 {
132 SELECT x FROM t1 WHERE y REGEXP '\Sma\w' ORDER BY x;
133} {}
134do_execsql_test regexp1-1.26 {
135 SELECT x FROM t1 WHERE y REGEXP 'alive\S$' ORDER BY x;
136} {4}
137do_execsql_test regexp1-1.27 {
138 SELECT x FROM t1 WHERE y REGEXP
139 '\b(unto|us|son|given|his|name|called|' ||
140 'wonderful|councelor|mighty|god|everlasting|father|' ||
141 'prince|peace|alive)\b';
142} {4}
drh14172742012-12-31 19:18:38 +0000143
drh25846af2012-12-31 20:16:35 +0000144do_execsql_test regexp1-2.1 {
145 SELECT 'aaaabbbbcccc' REGEXP 'ab*c',
146 'aaaacccc' REGEXP 'ab*c';
147} {1 1}
148do_execsql_test regexp1-2.2 {
149 SELECT 'aaaabbbbcccc' REGEXP 'ab+c',
150 'aaaacccc' REGEXP 'ab+c';
151} {1 0}
152do_execsql_test regexp1-2.3 {
153 SELECT 'aaaabbbbcccc' REGEXP 'ab?c',
154 'aaaacccc' REGEXP 'ab?c';
155} {0 1}
156do_execsql_test regexp1-2.4 {
157 SELECT 'aaaabbbbbbcccc' REGEXP 'ab{3,5}c',
158 'aaaabbbbbcccc' REGEXP 'ab{3,5}c',
159 'aaaabbbbcccc' REGEXP 'ab{3,5}c',
160 'aaaabbbcccc' REGEXP 'ab{3,5}c',
161 'aaaabbcccc' REGEXP 'ab{3,5}c',
162 'aaaabcccc' REGEXP 'ab{3,5}c'
163} {0 1 1 1 0 0}
164do_execsql_test regexp1-2.5 {
165 SELECT 'aaaabbbbcccc' REGEXP 'a(a|b|c)+c',
166 'aaaabbbbcccc' REGEXP '^a(a|b|c){11}c$',
167 'aaaabbbbcccc' REGEXP '^a(a|b|c){10}c$',
168 'aaaabbbbcccc' REGEXP '^a(a|b|c){9}c$'
169} {1 0 1 0}
170do_execsql_test regexp1-2.6 {
171 SELECT 'aaaabbbbcccc' REGEXP '^a(a|bb|c)+c$',
172 'aaaabbbbcccc' REGEXP '^a(a|bbb|c)+c$',
173 'aaaabbbbcccc' REGEXP '^a(a|bbbb|c)+c$'
174} {1 0 1}
175do_execsql_test regexp1-2.7 {
176 SELECT 'aaaabbbbcccc' REGEXP '^a([ac]+|bb){3}c$',
177 'aaaabbbbcccc' REGEXP '^a([ac]+|bb){4}c$',
178 'aaaabbbbcccc' REGEXP '^a([ac]+|bb){5}c$'
179} {0 1 1}
180
181do_execsql_test regexp1-2.8 {
182 SELECT 'abc*def+ghi.jkl[mno]pqr' REGEXP 'c.d',
183 'abc*def+ghi.jkl[mno]pqr' REGEXP 'c\*d',
184 'abc*def+ghi.jkl[mno]pqr' REGEXP 'f\+g',
185 'abc*def+ghi.jkl[mno]pqr' REGEXP 'i\.j',
186 'abc*def+ghi.jkl[mno]pqr' REGEXP 'l\[mno\]p'
187} {1 1 1 1 1}
188
189do_test regexp1-2.9 {
190 set v1 "abc\ndef"
191 db eval {SELECT $v1 REGEXP '^abc\ndef$'}
192} {1}
193do_test regexp1-2.10 {
194 set v1 "abc\adef"
195 db eval {SELECT $v1 REGEXP '^abc\adef$'}
196} {1}
197do_test regexp1-2.11 {
198 set v1 "abc\tdef"
199 db eval {SELECT $v1 REGEXP '^abc\tdef$'}
200} {1}
201do_test regexp1-2.12 {
202 set v1 "abc\rdef"
203 db eval {SELECT $v1 REGEXP '^abc\rdef$'}
204} {1}
205do_test regexp1-2.13 {
206 set v1 "abc\fdef"
207 db eval {SELECT $v1 REGEXP '^abc\fdef$'}
208} {1}
209do_test regexp1-2.14 {
210 set v1 "abc\vdef"
211 db eval {SELECT $v1 REGEXP '^abc\vdef$'}
212} {1}
213do_execsql_test regexp1-2.15 {
214 SELECT 'abc\def' REGEXP '^abc\\def',
215 'abc(def' REGEXP '^abc\(def',
216 'abc)def' REGEXP '^abc\)def',
217 'abc*def' REGEXP '^abc\*def',
218 'abc.def' REGEXP '^abc\.def',
219 'abc+def' REGEXP '^abc\+def',
220 'abc?def' REGEXP '^abc\?def',
221 'abc[def' REGEXP '^abc\[def',
222 'abc$def' REGEXP '^abc\$',
223 '^def' REGEXP '\^def',
224 'abc{4}x' REGEXP '^abc\{4\}x$',
225 'abc|def' REGEXP '^abc\|def$'
226} {1 1 1 1 1 1 1 1 1 1 1 1}
227
228do_execsql_test regexp1-2.20 {
229 SELECT 'abc$¢€xyz' REGEXP '^abc\u0024\u00a2\u20acxyz$',
230 'abc$¢€xyz' REGEXP '^abc\u0024\u00A2\u20ACxyz$',
drhb064dc32013-01-18 03:35:14 +0000231 'abc$¢€xyz' REGEXP '^abc\x24\xa2\u20acxyz$'
drh25846af2012-12-31 20:16:35 +0000232} {1 1 1}
233do_execsql_test regexp1-2.21 {
234 SELECT 'abc$¢€xyz' REGEXP '^abc[\u0024][\u00a2][\u20ac]xyz$',
235 'abc$¢€xyz' REGEXP '^abc[\u0024\u00A2\u20AC]{3}xyz$',
drhb064dc32013-01-18 03:35:14 +0000236 'abc$¢€xyz' REGEXP '^abc[\x24][\xa2\u20ac]+xyz$'
drh25846af2012-12-31 20:16:35 +0000237} {1 1 1}
238do_execsql_test regexp1-2.22 {
239 SELECT 'abc$¢€xyz' REGEXP '^abc[^\u0025-X][^ -\u007f][^\u20ab]xyz$'
240} {1}
drh14172742012-12-31 19:18:38 +0000241
drhf28727f2022-07-03 14:25:47 +0000242# 2022-07-03
243# https://sqlite.org/forum/forumpost/96692f8ba5
244# The REGEXP extension mishandles the prefix search optimization when
245# the prefix contains 3-byte UTF8 characters.
246#
247reset_db
248load_static_extension db regexp
249do_execsql_test regexp1-3.1 {
250 CREATE TABLE t1(id INTEGER PRIMARY KEY, a TEXT);
251 INSERT INTO t1(id, a) VALUES(1, '日本語');
252 SELECT a, hex(a), length(a) FROM t1;
253} {日本語 E697A5E69CACE8AA9E 3}
254do_execsql_test regexp1-3.2 {
255 SELECT * FROM t1 WHERE a='日本語';
256} {1 日本語}
257do_execsql_test regexp1-3.3 {
258 SELECT * FROM t1 WHERE a LIKE '日本語';
259} {1 日本語}
260do_execsql_test regexp1-3.4 {
261 SELECT * FROM t1 wHERE a REGEXP '日本語';
262} {1 日本語}
263
drh0e4ab0d2022-07-03 18:12:43 +0000264# 2022-07-03
265# https://sqlite.org/forum/forumpost/96692f8ba5 Issue #2
266# The '$' token in REGEXP contained within other elements.
267#
268do_execsql_test regexp1-4.1 {SELECT 'xab' REGEXP 'a(b$|cd)';} {1}
269do_execsql_test regexp1-4.1b {SELECT 'xab' REGEXP '(b$|cd)';} {1}
270do_execsql_test regexp1-4.2 {SELECT 'xaby' REGEXP 'a(b$|cd)';} {0}
271do_execsql_test regexp1-4.3 {SELECT 'xacd' REGEXP 'a(b$|cd)';} {1}
272do_execsql_test regexp1-4.4 {SELECT 'xacdy' REGEXP 'a(b$|cd)';} {1}
273do_execsql_test regexp1-4.5 {SELECT 'xab' REGEXP 'a(cd|b$)';} {1}
274do_execsql_test regexp1-4.6 {SELECT 'xaby' REGEXP 'a(cd|b$)';} {0}
275do_execsql_test regexp1-4.7 {SELECT 'xacd' REGEXP 'a(cd|b$)';} {1}
276do_execsql_test regexp1-4.8 {SELECT 'xacdy' REGEXP 'a(cd|b$)';} {1}
277do_execsql_test regexp1-4.9 {SELECT 'xab' REGEXP 'a(cd|b$|e)';} {1}
278do_execsql_test regexp1-4.10 {SELECT 'xaby' REGEXP 'a(cd|b$|e)';} {0}
279do_execsql_test regexp1-4.11 {SELECT 'xacd' REGEXP 'a(cd|b$|e)';} {1}
280do_execsql_test regexp1-4.12 {SELECT 'xacdy' REGEXP 'a(cd|b$|e)';} {1}
281
drh18934132022-07-18 13:10:53 +0000282# 2022-07-18
283# https://sqlite.org/forum/forumpost/57cbaf1d0e
284# Incorrect bytecode for {M,N} when M is zero.
285#
286do_execsql_test regexp1-5.1 {SELECT 'fooX' REGEXP '^[a-z][a-z0-9]{0,30}$';} {0}
287do_execsql_test regexp1-5.2 {SELECT 'fooX' REGEXP '^[a-z][a-z0-9]{0,30}X$';} {1}
288do_execsql_test regexp1-5.3 {SELECT 'fooX' REGEXP '^[a-z][a-z0-9]{0,2}X$';} {1}
289do_execsql_test regexp1-5.4 {SELECT 'foooX' REGEXP '^[a-z][a-z0-9]{0,2}X$';} {0}
290do_execsql_test regexp1-5.5 {SELECT 'foooX' REGEXP '^[a-z][a-z0-9]{0,3}X$';} {1}
291
drh449b4732022-07-18 13:55:21 +0000292# 2022-07-18
293# https://sqlite.org/forum/forumpost/18f87fdcdf
294# Allow "^" to occur inside of "(..)"
295#
296do_execsql_test regexp1-6.1 {SELECT 'foo' REGEXP '[a-z]';} {1}
297do_execsql_test regexp1-6.2 {SELECT 'foo' REGEXP '^[a-z]+$';} {1}
298do_execsql_test regexp1-6.3 {SELECT 'foo' REGEXP '^([a-z]+)$';} {1}
299do_execsql_test regexp1-6.4 {SELECT 'foo' REGEXP '(^[a-z]+)$';} {1}
300do_execsql_test regexp1-6.5 {SELECT 'foo' REGEXP '(^[a-z]+$)';} {1}
301do_execsql_test regexp1-6.6 {SELECT 'abc' REGEXP '(^abc|def)';} {1}
302do_execsql_test regexp1-6.7 {SELECT 'xabc' REGEXP '(^abc|def)';} {0}
303do_execsql_test regexp1-6.8 {SELECT 'def' REGEXP '(^abc|def)';} {1}
304do_execsql_test regexp1-6.9 {SELECT 'xdef' REGEXP '(^abc|def)';} {1}
305
drhd8e48ff2022-11-17 19:24:39 +0000306# 2022-11-17
307# https://sqlite.org/forum/forumpost/3ffe058b04
308#
309do_execsql_test regexp1-7.1 {
310 SELECT char(0x61,0x7ff,0x62) REGEXP char(0x7ff);
311} 1
312do_execsql_test regexp1-7.2 {
313 SELECT char(0x61,0x800,0x62) REGEXP char(0x800);
314} 1
315do_execsql_test regexp1-7.3 {
316 SELECT char(0x61,0xabc,0x62) REGEXP char(0xabc);
317} 1
318do_execsql_test regexp1-7.4 {
319 SELECT char(0x61,0xfff,0x62) REGEXP char(0xfff);
320} 1
321do_execsql_test regexp1-7.5 {
322 SELECT char(0x61,0x1000,0x62) REGEXP char(0x1000);
323} 1
324do_execsql_test regexp1-7.10 {
325 SELECT char(0x61,0xffff,0x62) REGEXP char(0xffff);
326} 1
327do_execsql_test regexp1-7.11 {
328 SELECT char(0x61,0x10000,0x62) REGEXP char(0x10000);
329} 1
330do_execsql_test regexp1-7.12 {
331 SELECT char(0x61,0x10ffff,0x62) REGEXP char(0x10ffff);
332} 1
drh18934132022-07-18 13:10:53 +0000333
drh0e4ab0d2022-07-03 18:12:43 +0000334
drh14172742012-12-31 19:18:38 +0000335finish_test