blob: bb9098534ee23687797f7bf95e8430bb99e029a1 [file] [log] [blame]
drh75897232000-05-29 14:26:00 +00001/*
2** Copyright (c) 1999, 2000 D. Richard Hipp
3**
4** This program is free software; you can redistribute it and/or
5** modify it under the terms of the GNU General Public
6** License as published by the Free Software Foundation; either
7** version 2 of the License, or (at your option) any later version.
8**
9** This program is distributed in the hope that it will be useful,
10** but WITHOUT ANY WARRANTY; without even the implied warranty of
11** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12** General Public License for more details.
13**
14** You should have received a copy of the GNU General Public
15** License along with this library; if not, write to the
16** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17** Boston, MA 02111-1307, USA.
18**
19** Author contact information:
20** drh@hwaci.com
21** http://www.hwaci.com/drh/
22**
23*************************************************************************
24** An tokenizer for SQL
25**
26** This file contains C code that splits an SQL input string up into
27** individual tokens and sends those tokens one-by-one over to the
28** parser for analysis.
29**
drh4c504392000-10-16 22:06:40 +000030** $Id: tokenize.c,v 1.14 2000/10/16 22:06:42 drh Exp $
drh75897232000-05-29 14:26:00 +000031*/
32#include "sqliteInt.h"
33#include <ctype.h>
drhdcc581c2000-05-30 13:44:19 +000034#include <stdlib.h>
drh75897232000-05-29 14:26:00 +000035
36/*
37** All the keywords of the SQL language are stored as in a hash
38** table composed of instances of the following structure.
39*/
40typedef struct Keyword Keyword;
41struct Keyword {
42 char *zName; /* The keyword name */
43 int len; /* Number of characters in the keyword */
44 int tokenType; /* The token value for this keyword */
45 Keyword *pNext; /* Next keyword with the same hash */
46};
47
48/*
49** These are the keywords
50*/
51static Keyword aKeywordTable[] = {
drhfef52082000-06-06 01:50:43 +000052 { "ALL", 0, TK_ALL, 0 },
drh75897232000-05-29 14:26:00 +000053 { "AND", 0, TK_AND, 0 },
54 { "AS", 0, TK_AS, 0 },
55 { "ASC", 0, TK_ASC, 0 },
drhfef52082000-06-06 01:50:43 +000056 { "BETWEEN", 0, TK_BETWEEN, 0 },
drh75897232000-05-29 14:26:00 +000057 { "BY", 0, TK_BY, 0 },
58 { "CHECK", 0, TK_CHECK, 0 },
59 { "CONSTRAINT", 0, TK_CONSTRAINT, 0 },
drh982cef72000-05-30 16:27:03 +000060 { "COPY", 0, TK_COPY, 0 },
drh75897232000-05-29 14:26:00 +000061 { "CREATE", 0, TK_CREATE, 0 },
62 { "DEFAULT", 0, TK_DEFAULT, 0 },
63 { "DELETE", 0, TK_DELETE, 0 },
drh982cef72000-05-30 16:27:03 +000064 { "DELIMITERS", 0, TK_DELIMITERS, 0 },
drh75897232000-05-29 14:26:00 +000065 { "DESC", 0, TK_DESC, 0 },
drhefb72512000-05-31 20:00:52 +000066 { "DISTINCT", 0, TK_DISTINCT, 0 },
drh75897232000-05-29 14:26:00 +000067 { "DROP", 0, TK_DROP, 0 },
drh82c3d632000-06-06 21:56:07 +000068 { "EXCEPT", 0, TK_EXCEPT, 0 },
drh75897232000-05-29 14:26:00 +000069 { "EXPLAIN", 0, TK_EXPLAIN, 0 },
70 { "FROM", 0, TK_FROM, 0 },
drhdce2cbe2000-05-31 02:27:49 +000071 { "GLOB", 0, TK_GLOB, 0 },
drh22827922000-06-06 17:27:05 +000072 { "GROUP", 0, TK_GROUP, 0 },
73 { "HAVING", 0, TK_HAVING, 0 },
drhfef52082000-06-06 01:50:43 +000074 { "IN", 0, TK_IN, 0 },
drh75897232000-05-29 14:26:00 +000075 { "INDEX", 0, TK_INDEX, 0 },
76 { "INSERT", 0, TK_INSERT, 0 },
drh82c3d632000-06-06 21:56:07 +000077 { "INTERSECT", 0, TK_INTERSECT, 0 },
drh75897232000-05-29 14:26:00 +000078 { "INTO", 0, TK_INTO, 0 },
79 { "IS", 0, TK_IS, 0 },
80 { "ISNULL", 0, TK_ISNULL, 0 },
81 { "KEY", 0, TK_KEY, 0 },
drhdce2cbe2000-05-31 02:27:49 +000082 { "LIKE", 0, TK_LIKE, 0 },
drh75897232000-05-29 14:26:00 +000083 { "NOT", 0, TK_NOT, 0 },
84 { "NOTNULL", 0, TK_NOTNULL, 0 },
85 { "NULL", 0, TK_NULL, 0 },
86 { "ON", 0, TK_ON, 0 },
87 { "OR", 0, TK_OR, 0 },
88 { "ORDER", 0, TK_ORDER, 0 },
89 { "PRIMARY", 0, TK_PRIMARY, 0 },
90 { "SELECT", 0, TK_SELECT, 0 },
91 { "SET", 0, TK_SET, 0 },
92 { "TABLE", 0, TK_TABLE, 0 },
drh82c3d632000-06-06 21:56:07 +000093 { "UNION", 0, TK_UNION, 0 },
drh75897232000-05-29 14:26:00 +000094 { "UNIQUE", 0, TK_UNIQUE, 0 },
95 { "UPDATE", 0, TK_UPDATE, 0 },
drh982cef72000-05-30 16:27:03 +000096 { "USING", 0, TK_USING, 0 },
drhdce2cbe2000-05-31 02:27:49 +000097 { "VACUUM", 0, TK_VACUUM, 0 },
drh75897232000-05-29 14:26:00 +000098 { "VALUES", 0, TK_VALUES, 0 },
99 { "WHERE", 0, TK_WHERE, 0 },
100};
101
102/*
103** This is the hash table
104*/
105#define KEY_HASH_SIZE 37
106static Keyword *apHashTable[KEY_HASH_SIZE];
107
108
109/*
110** This function looks up an identifier to determine if it is a
111** keyword. If it is a keyword, the token code of that keyword is
112** returned. If the input is not a keyword, TK_ID is returned.
113*/
114static int sqliteKeywordCode(const char *z, int n){
115 int h;
116 Keyword *p;
117 if( aKeywordTable[0].len==0 ){
118 /* Initialize the keyword hash table */
119 int i;
120 int n;
121 n = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]);
122 for(i=0; i<n; i++){
123 aKeywordTable[i].len = strlen(aKeywordTable[i].zName);
124 h = sqliteHashNoCase(aKeywordTable[i].zName, aKeywordTable[i].len);
125 h %= KEY_HASH_SIZE;
126 aKeywordTable[i].pNext = apHashTable[h];
127 apHashTable[h] = &aKeywordTable[i];
128 }
129 }
130 h = sqliteHashNoCase(z, n) % KEY_HASH_SIZE;
131 for(p=apHashTable[h]; p; p=p->pNext){
132 if( p->len==n && sqliteStrNICmp(p->zName, z, n)==0 ){
133 return p->tokenType;
134 }
135 }
136 return TK_ID;
137}
138
139/*
140** Return the length of the token that begins at z[0]. Return
141** -1 if the token is (or might be) incomplete. Store the token
142** type in *tokenType before returning.
143*/
144int sqliteGetToken(const char *z, int *tokenType){
145 int i;
146 switch( *z ){
drh30cab802000-08-09 17:17:25 +0000147 case ' ': case '\t': case '\n': case '\f': case '\r': {
drh75897232000-05-29 14:26:00 +0000148 for(i=1; z[i] && isspace(z[i]); i++){}
149 *tokenType = TK_SPACE;
150 return i;
151 }
152 case '-': {
153 if( z[1]==0 ) return -1;
154 if( z[1]=='-' ){
155 for(i=2; z[i] && z[i]!='\n'; i++){}
156 *tokenType = TK_COMMENT;
157 return i;
158 }
159 *tokenType = TK_MINUS;
160 return 1;
161 }
162 case '(': {
163 *tokenType = TK_LP;
164 return 1;
165 }
166 case ')': {
167 *tokenType = TK_RP;
168 return 1;
169 }
170 case ';': {
171 *tokenType = TK_SEMI;
172 return 1;
173 }
174 case '+': {
175 *tokenType = TK_PLUS;
176 return 1;
177 }
178 case '*': {
179 *tokenType = TK_STAR;
180 return 1;
181 }
182 case '/': {
183 *tokenType = TK_SLASH;
184 return 1;
185 }
186 case '=': {
187 *tokenType = TK_EQ;
188 return 1 + (z[1]=='=');
189 }
190 case '<': {
191 if( z[1]=='=' ){
192 *tokenType = TK_LE;
193 return 2;
194 }else if( z[1]=='>' ){
195 *tokenType = TK_NE;
196 return 2;
197 }else{
198 *tokenType = TK_LT;
199 return 1;
200 }
201 }
202 case '>': {
203 if( z[1]=='=' ){
204 *tokenType = TK_GE;
205 return 2;
206 }else{
207 *tokenType = TK_GT;
208 return 1;
209 }
210 }
211 case '!': {
212 if( z[1]!='=' ){
213 *tokenType = TK_ILLEGAL;
drhc837e702000-06-08 16:26:24 +0000214 return 2;
drh75897232000-05-29 14:26:00 +0000215 }else{
216 *tokenType = TK_NE;
217 return 2;
218 }
219 }
drh00400772000-06-16 20:51:26 +0000220 case '|': {
221 if( z[1]!='|' ){
222 *tokenType = TK_ILLEGAL;
223 return 1;
224 }else{
225 *tokenType = TK_CONCAT;
226 return 2;
227 }
228 }
drh75897232000-05-29 14:26:00 +0000229 case ',': {
230 *tokenType = TK_COMMA;
231 return 1;
232 }
233 case '\'': case '"': {
234 int delim = z[0];
235 for(i=1; z[i]; i++){
236 if( z[i]==delim ){
237 if( z[i+1]==delim ){
238 i++;
239 }else{
240 break;
241 }
242 }
243 }
244 if( z[i] ) i++;
245 *tokenType = TK_STRING;
246 return i;
247 }
248 case '.': {
249 if( !isdigit(z[1]) ){
250 *tokenType = TK_DOT;
251 return 1;
252 }
253 /* Fall thru into the next case */
254 }
255 case '0': case '1': case '2': case '3': case '4':
256 case '5': case '6': case '7': case '8': case '9': {
drhc837e702000-06-08 16:26:24 +0000257 *tokenType = TK_INTEGER;
drh75897232000-05-29 14:26:00 +0000258 for(i=1; z[i] && isdigit(z[i]); i++){}
259 if( z[i]=='.' ){
260 i++;
261 while( z[i] && isdigit(z[i]) ){ i++; }
drhc837e702000-06-08 16:26:24 +0000262 *tokenType = TK_FLOAT;
263 }
264 if( (z[i]=='e' || z[i]=='E') &&
drh75897232000-05-29 14:26:00 +0000265 ( isdigit(z[i+1])
266 || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2]))
267 )
drhc837e702000-06-08 16:26:24 +0000268 ){
269 i += 2;
270 while( z[i] && isdigit(z[i]) ){ i++; }
drh75897232000-05-29 14:26:00 +0000271 *tokenType = TK_FLOAT;
272 }else if( z[0]=='.' ){
273 *tokenType = TK_FLOAT;
drh75897232000-05-29 14:26:00 +0000274 }
275 return i;
276 }
277 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
278 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
279 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
280 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
281 case 'y': case 'z': case '_':
282 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
283 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
284 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
285 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
286 case 'Y': case 'Z': {
287 for(i=1; z[i] && (isalnum(z[i]) || z[i]=='_'); i++){}
288 *tokenType = sqliteKeywordCode(z, i);
289 return i;
290 }
291 default: {
292 break;
293 }
294 }
295 *tokenType = TK_ILLEGAL;
296 return 1;
297}
298
299/*
300** Run the parser on the given SQL string. The parser structure is
drh4c504392000-10-16 22:06:40 +0000301** passed in. An SQLITE_ status code.
drh75897232000-05-29 14:26:00 +0000302*/
303int sqliteRunParser(Parse *pParse, char *zSql, char **pzErrMsg){
304 int nErr = 0;
305 int i;
306 void *pEngine;
307 int once = 1;
308 static FILE *trace = 0;
309 extern void *sqliteParserAlloc(void*(*)(int));
310 extern void sqliteParserFree(void*, void(*)(void*));
311 extern int sqliteParser(void*, int, ...);
312 extern void sqliteParserTrace(FILE*, char *);
313
drh4c504392000-10-16 22:06:40 +0000314 pParse->db->flags &= ~SQLITE_Interrupt;
315 pParse->rc = SQLITE_OK;
drh75897232000-05-29 14:26:00 +0000316 i = 0;
drh22827922000-06-06 17:27:05 +0000317 sqliteParseInfoReset(pParse);
drh982cef72000-05-30 16:27:03 +0000318 pEngine = sqliteParserAlloc((void*(*)(int))malloc);
drh75897232000-05-29 14:26:00 +0000319 if( pEngine==0 ){
320 sqliteSetString(pzErrMsg, "out of memory", 0);
321 return 1;
322 }
323 sqliteParserTrace(trace, "parser: ");
324 while( nErr==0 && i>=0 && zSql[i]!=0 ){
325 int tokenType;
326
drh4c504392000-10-16 22:06:40 +0000327 if( (pParse->db->flags & SQLITE_Interrupt)!=0 ){
328 pParse->rc = SQLITE_INTERRUPT;
329 sqliteSetString(pzErrMsg, "interrupt", 0);
330 break;
331 }
drh75897232000-05-29 14:26:00 +0000332 pParse->sLastToken.z = &zSql[i];
333 pParse->sLastToken.n = sqliteGetToken(&zSql[i], &tokenType);
334 i += pParse->sLastToken.n;
335 if( once ){
336 pParse->sFirstToken = pParse->sLastToken;
337 once = 0;
338 }
339 switch( tokenType ){
340 case TK_SPACE:
341 break;
342 case TK_COMMENT: {
343 /* Various debugging modes can be turned on and off using
344 ** special SQL comments. Check for the special comments
345 ** here and take approriate action if found.
346 */
drh6e142f52000-06-08 13:36:40 +0000347#ifndef NDEBUG
drh75897232000-05-29 14:26:00 +0000348 char *z = pParse->sLastToken.z;
349 if( sqliteStrNICmp(z,"--parser-trace-on--",19)==0 ){
350 trace = stderr;
351 sqliteParserTrace(trace, "parser: ");
352 }else if( sqliteStrNICmp(z,"--parser-trace-off--", 20)==0 ){
353 trace = 0;
354 sqliteParserTrace(trace, "parser: ");
355 }else if( sqliteStrNICmp(z,"--vdbe-trace-on--",17)==0 ){
356 pParse->db->flags |= SQLITE_VdbeTrace;
drh670f74f2000-06-07 02:04:22 +0000357 }else if( sqliteStrNICmp(z,"--vdbe-trace-off--", 18)==0 ){
drh75897232000-05-29 14:26:00 +0000358 pParse->db->flags &= ~SQLITE_VdbeTrace;
drh6e142f52000-06-08 13:36:40 +0000359#ifdef MEMORY_DEBUG
drh30cab802000-08-09 17:17:25 +0000360 }else if( sqliteStrNICmp(z,"--malloc-fail=",14)==0 ){
drh6e142f52000-06-08 13:36:40 +0000361 sqlite_iMallocFail = atoi(&z[14]);
362 }else if( sqliteStrNICmp(z,"--malloc-stats--", 16)==0 ){
363 if( pParse->xCallback ){
364 static char *azName[4] = {"malloc", "free", "to_fail", 0 };
365 char *azArg[4];
366 char zVal[3][30];
367 sprintf(zVal[0],"%d", sqlite_nMalloc);
368 sprintf(zVal[1],"%d", sqlite_nFree);
369 sprintf(zVal[2],"%d", sqlite_iMallocFail);
370 azArg[0] = zVal[0];
371 azArg[1] = zVal[1];
372 azArg[2] = zVal[2];
373 azArg[3] = 0;
374 pParse->xCallback(pParse->pArg, 3, azArg, azName);
375 }
376#endif
drh75897232000-05-29 14:26:00 +0000377 }
drh6e142f52000-06-08 13:36:40 +0000378#endif
drh75897232000-05-29 14:26:00 +0000379 break;
380 }
381 case TK_ILLEGAL:
drhc837e702000-06-08 16:26:24 +0000382 sqliteSetNString(pzErrMsg, "unrecognized token: \"", -1,
383 pParse->sLastToken.z, pParse->sLastToken.n, "\"", 1, 0);
drh75897232000-05-29 14:26:00 +0000384 nErr++;
385 break;
386 default:
387 sqliteParser(pEngine, tokenType, pParse->sLastToken, pParse);
drh6e142f52000-06-08 13:36:40 +0000388 if( pParse->zErrMsg && pParse->sErrToken.z ){
drh75897232000-05-29 14:26:00 +0000389 sqliteSetNString(pzErrMsg, "near \"", -1,
390 pParse->sErrToken.z, pParse->sErrToken.n,
391 "\": ", -1,
392 pParse->zErrMsg, -1,
393 0);
394 nErr++;
drh6e142f52000-06-08 13:36:40 +0000395 sqliteFree(pParse->zErrMsg);
396 pParse->zErrMsg = 0;
drh75897232000-05-29 14:26:00 +0000397 }
398 break;
399 }
400 }
drh4c504392000-10-16 22:06:40 +0000401 if( nErr==0 && (pParse->db->flags & SQLITE_Interrupt)==0 ){
drh75897232000-05-29 14:26:00 +0000402 sqliteParser(pEngine, 0, pParse->sLastToken, pParse);
drh6e142f52000-06-08 13:36:40 +0000403 if( pParse->zErrMsg && pParse->sErrToken.z ){
drh75897232000-05-29 14:26:00 +0000404 sqliteSetNString(pzErrMsg, "near \"", -1,
405 pParse->sErrToken.z, pParse->sErrToken.n,
406 "\": ", -1,
407 pParse->zErrMsg, -1,
408 0);
409 nErr++;
drh6e142f52000-06-08 13:36:40 +0000410 sqliteFree(pParse->zErrMsg);
411 pParse->zErrMsg = 0;
drh75897232000-05-29 14:26:00 +0000412 }
413 }
drhdcc581c2000-05-30 13:44:19 +0000414 sqliteParserFree(pEngine, free);
drh75897232000-05-29 14:26:00 +0000415 if( pParse->zErrMsg ){
416 if( pzErrMsg ){
drh6e142f52000-06-08 13:36:40 +0000417 sqliteFree(*pzErrMsg);
drh75897232000-05-29 14:26:00 +0000418 *pzErrMsg = pParse->zErrMsg;
419 }else{
420 sqliteFree(pParse->zErrMsg);
421 }
422 if( !nErr ) nErr++;
423 }
424 if( pParse->pVdbe ){
425 sqliteVdbeDelete(pParse->pVdbe);
426 pParse->pVdbe = 0;
427 }
428 if( pParse->pNewTable ){
429 sqliteDeleteTable(pParse->db, pParse->pNewTable);
430 pParse->pNewTable = 0;
431 }
drh22827922000-06-06 17:27:05 +0000432 sqliteParseInfoReset(pParse);
drh4c504392000-10-16 22:06:40 +0000433 sqliteStrRealloc(pzErrMsg);
434 if( nErr>0 && pParse->rc==SQLITE_OK ){
435 pParse->rc = SQLITE_ERROR;
436 }
drh75897232000-05-29 14:26:00 +0000437 return nErr;
438}