blob: be97113215aeb1d9fb18cfc522c2fae73d5b5bef [file] [log] [blame]
drh75897232000-05-29 14:26:00 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drh75897232000-05-29 14:26:00 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drh75897232000-05-29 14:26:00 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drh75897232000-05-29 14:26:00 +000010**
11*************************************************************************
12** An tokenizer for SQL
13**
14** This file contains C code that splits an SQL input string up into
15** individual tokens and sends those tokens one-by-one over to the
16** parser for analysis.
17**
drhb19a2bc2001-09-16 00:13:26 +000018** $Id: tokenize.c,v 1.22 2001/09/16 00:13:27 drh Exp $
drh75897232000-05-29 14:26:00 +000019*/
20#include "sqliteInt.h"
21#include <ctype.h>
drhdcc581c2000-05-30 13:44:19 +000022#include <stdlib.h>
drh75897232000-05-29 14:26:00 +000023
24/*
25** All the keywords of the SQL language are stored as in a hash
26** table composed of instances of the following structure.
27*/
28typedef struct Keyword Keyword;
29struct Keyword {
30 char *zName; /* The keyword name */
31 int len; /* Number of characters in the keyword */
32 int tokenType; /* The token value for this keyword */
33 Keyword *pNext; /* Next keyword with the same hash */
34};
35
36/*
37** These are the keywords
38*/
39static Keyword aKeywordTable[] = {
drhfef52082000-06-06 01:50:43 +000040 { "ALL", 0, TK_ALL, 0 },
drh75897232000-05-29 14:26:00 +000041 { "AND", 0, TK_AND, 0 },
42 { "AS", 0, TK_AS, 0 },
43 { "ASC", 0, TK_ASC, 0 },
drhc4a3c772001-04-04 11:48:57 +000044 { "BEGIN", 0, TK_BEGIN, 0 },
drhfef52082000-06-06 01:50:43 +000045 { "BETWEEN", 0, TK_BETWEEN, 0 },
drh75897232000-05-29 14:26:00 +000046 { "BY", 0, TK_BY, 0 },
47 { "CHECK", 0, TK_CHECK, 0 },
drhf57b14a2001-09-14 18:54:08 +000048 { "CLUSTER", 0, TK_CLUSTER, 0 },
drhc4a3c772001-04-04 11:48:57 +000049 { "COMMIT", 0, TK_COMMIT, 0 },
drh75897232000-05-29 14:26:00 +000050 { "CONSTRAINT", 0, TK_CONSTRAINT, 0 },
drh982cef72000-05-30 16:27:03 +000051 { "COPY", 0, TK_COPY, 0 },
drh75897232000-05-29 14:26:00 +000052 { "CREATE", 0, TK_CREATE, 0 },
53 { "DEFAULT", 0, TK_DEFAULT, 0 },
54 { "DELETE", 0, TK_DELETE, 0 },
drh982cef72000-05-30 16:27:03 +000055 { "DELIMITERS", 0, TK_DELIMITERS, 0 },
drh75897232000-05-29 14:26:00 +000056 { "DESC", 0, TK_DESC, 0 },
drhefb72512000-05-31 20:00:52 +000057 { "DISTINCT", 0, TK_DISTINCT, 0 },
drh75897232000-05-29 14:26:00 +000058 { "DROP", 0, TK_DROP, 0 },
drhc4a3c772001-04-04 11:48:57 +000059 { "END", 0, TK_END, 0 },
drh82c3d632000-06-06 21:56:07 +000060 { "EXCEPT", 0, TK_EXCEPT, 0 },
drh75897232000-05-29 14:26:00 +000061 { "EXPLAIN", 0, TK_EXPLAIN, 0 },
62 { "FROM", 0, TK_FROM, 0 },
drhdce2cbe2000-05-31 02:27:49 +000063 { "GLOB", 0, TK_GLOB, 0 },
drh22827922000-06-06 17:27:05 +000064 { "GROUP", 0, TK_GROUP, 0 },
65 { "HAVING", 0, TK_HAVING, 0 },
drhfef52082000-06-06 01:50:43 +000066 { "IN", 0, TK_IN, 0 },
drh75897232000-05-29 14:26:00 +000067 { "INDEX", 0, TK_INDEX, 0 },
68 { "INSERT", 0, TK_INSERT, 0 },
drh82c3d632000-06-06 21:56:07 +000069 { "INTERSECT", 0, TK_INTERSECT, 0 },
drh75897232000-05-29 14:26:00 +000070 { "INTO", 0, TK_INTO, 0 },
71 { "IS", 0, TK_IS, 0 },
72 { "ISNULL", 0, TK_ISNULL, 0 },
73 { "KEY", 0, TK_KEY, 0 },
drhdce2cbe2000-05-31 02:27:49 +000074 { "LIKE", 0, TK_LIKE, 0 },
drh75897232000-05-29 14:26:00 +000075 { "NOT", 0, TK_NOT, 0 },
76 { "NOTNULL", 0, TK_NOTNULL, 0 },
77 { "NULL", 0, TK_NULL, 0 },
78 { "ON", 0, TK_ON, 0 },
79 { "OR", 0, TK_OR, 0 },
80 { "ORDER", 0, TK_ORDER, 0 },
drhf57b14a2001-09-14 18:54:08 +000081 { "PRAGMA", 0, TK_PRAGMA, 0 },
drh75897232000-05-29 14:26:00 +000082 { "PRIMARY", 0, TK_PRIMARY, 0 },
drhc4a3c772001-04-04 11:48:57 +000083 { "ROLLBACK", 0, TK_ROLLBACK, 0 },
drh75897232000-05-29 14:26:00 +000084 { "SELECT", 0, TK_SELECT, 0 },
85 { "SET", 0, TK_SET, 0 },
86 { "TABLE", 0, TK_TABLE, 0 },
drhc4a3c772001-04-04 11:48:57 +000087 { "TRANSACTION", 0, TK_TRANSACTION, 0 },
drh82c3d632000-06-06 21:56:07 +000088 { "UNION", 0, TK_UNION, 0 },
drh75897232000-05-29 14:26:00 +000089 { "UNIQUE", 0, TK_UNIQUE, 0 },
90 { "UPDATE", 0, TK_UPDATE, 0 },
drh982cef72000-05-30 16:27:03 +000091 { "USING", 0, TK_USING, 0 },
drhdce2cbe2000-05-31 02:27:49 +000092 { "VACUUM", 0, TK_VACUUM, 0 },
drh75897232000-05-29 14:26:00 +000093 { "VALUES", 0, TK_VALUES, 0 },
94 { "WHERE", 0, TK_WHERE, 0 },
95};
96
97/*
98** This is the hash table
99*/
drhdaffd0e2001-04-11 14:28:42 +0000100#define KEY_HASH_SIZE 71
drh75897232000-05-29 14:26:00 +0000101static Keyword *apHashTable[KEY_HASH_SIZE];
102
103
104/*
105** This function looks up an identifier to determine if it is a
106** keyword. If it is a keyword, the token code of that keyword is
107** returned. If the input is not a keyword, TK_ID is returned.
108*/
109static int sqliteKeywordCode(const char *z, int n){
110 int h;
111 Keyword *p;
112 if( aKeywordTable[0].len==0 ){
113 /* Initialize the keyword hash table */
114 int i;
115 int n;
116 n = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]);
117 for(i=0; i<n; i++){
118 aKeywordTable[i].len = strlen(aKeywordTable[i].zName);
119 h = sqliteHashNoCase(aKeywordTable[i].zName, aKeywordTable[i].len);
120 h %= KEY_HASH_SIZE;
121 aKeywordTable[i].pNext = apHashTable[h];
122 apHashTable[h] = &aKeywordTable[i];
123 }
124 }
125 h = sqliteHashNoCase(z, n) % KEY_HASH_SIZE;
126 for(p=apHashTable[h]; p; p=p->pNext){
127 if( p->len==n && sqliteStrNICmp(p->zName, z, n)==0 ){
128 return p->tokenType;
129 }
130 }
131 return TK_ID;
132}
133
134/*
135** Return the length of the token that begins at z[0]. Return
136** -1 if the token is (or might be) incomplete. Store the token
137** type in *tokenType before returning.
138*/
139int sqliteGetToken(const char *z, int *tokenType){
140 int i;
141 switch( *z ){
drh30cab802000-08-09 17:17:25 +0000142 case ' ': case '\t': case '\n': case '\f': case '\r': {
drh75897232000-05-29 14:26:00 +0000143 for(i=1; z[i] && isspace(z[i]); i++){}
144 *tokenType = TK_SPACE;
145 return i;
146 }
147 case '-': {
148 if( z[1]==0 ) return -1;
149 if( z[1]=='-' ){
150 for(i=2; z[i] && z[i]!='\n'; i++){}
151 *tokenType = TK_COMMENT;
152 return i;
153 }
154 *tokenType = TK_MINUS;
155 return 1;
156 }
157 case '(': {
158 *tokenType = TK_LP;
159 return 1;
160 }
161 case ')': {
162 *tokenType = TK_RP;
163 return 1;
164 }
165 case ';': {
166 *tokenType = TK_SEMI;
167 return 1;
168 }
169 case '+': {
170 *tokenType = TK_PLUS;
171 return 1;
172 }
173 case '*': {
174 *tokenType = TK_STAR;
175 return 1;
176 }
177 case '/': {
178 *tokenType = TK_SLASH;
179 return 1;
180 }
181 case '=': {
182 *tokenType = TK_EQ;
183 return 1 + (z[1]=='=');
184 }
185 case '<': {
186 if( z[1]=='=' ){
187 *tokenType = TK_LE;
188 return 2;
189 }else if( z[1]=='>' ){
190 *tokenType = TK_NE;
191 return 2;
192 }else{
193 *tokenType = TK_LT;
194 return 1;
195 }
196 }
197 case '>': {
198 if( z[1]=='=' ){
199 *tokenType = TK_GE;
200 return 2;
201 }else{
202 *tokenType = TK_GT;
203 return 1;
204 }
205 }
206 case '!': {
207 if( z[1]!='=' ){
208 *tokenType = TK_ILLEGAL;
drhc837e702000-06-08 16:26:24 +0000209 return 2;
drh75897232000-05-29 14:26:00 +0000210 }else{
211 *tokenType = TK_NE;
212 return 2;
213 }
214 }
drh00400772000-06-16 20:51:26 +0000215 case '|': {
216 if( z[1]!='|' ){
217 *tokenType = TK_ILLEGAL;
218 return 1;
219 }else{
220 *tokenType = TK_CONCAT;
221 return 2;
222 }
223 }
drh75897232000-05-29 14:26:00 +0000224 case ',': {
225 *tokenType = TK_COMMA;
226 return 1;
227 }
228 case '\'': case '"': {
229 int delim = z[0];
230 for(i=1; z[i]; i++){
231 if( z[i]==delim ){
232 if( z[i+1]==delim ){
233 i++;
234 }else{
235 break;
236 }
237 }
238 }
239 if( z[i] ) i++;
240 *tokenType = TK_STRING;
241 return i;
242 }
243 case '.': {
244 if( !isdigit(z[1]) ){
245 *tokenType = TK_DOT;
246 return 1;
247 }
248 /* Fall thru into the next case */
249 }
250 case '0': case '1': case '2': case '3': case '4':
251 case '5': case '6': case '7': case '8': case '9': {
drhc837e702000-06-08 16:26:24 +0000252 *tokenType = TK_INTEGER;
drh75897232000-05-29 14:26:00 +0000253 for(i=1; z[i] && isdigit(z[i]); i++){}
254 if( z[i]=='.' ){
255 i++;
256 while( z[i] && isdigit(z[i]) ){ i++; }
drhc837e702000-06-08 16:26:24 +0000257 *tokenType = TK_FLOAT;
258 }
259 if( (z[i]=='e' || z[i]=='E') &&
drh75897232000-05-29 14:26:00 +0000260 ( isdigit(z[i+1])
261 || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2]))
262 )
drhc837e702000-06-08 16:26:24 +0000263 ){
264 i += 2;
265 while( z[i] && isdigit(z[i]) ){ i++; }
drh75897232000-05-29 14:26:00 +0000266 *tokenType = TK_FLOAT;
267 }else if( z[0]=='.' ){
268 *tokenType = TK_FLOAT;
drh75897232000-05-29 14:26:00 +0000269 }
270 return i;
271 }
272 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
273 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
274 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
275 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
276 case 'y': case 'z': case '_':
277 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
278 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
279 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
280 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
281 case 'Y': case 'Z': {
282 for(i=1; z[i] && (isalnum(z[i]) || z[i]=='_'); i++){}
283 *tokenType = sqliteKeywordCode(z, i);
284 return i;
285 }
286 default: {
287 break;
288 }
289 }
290 *tokenType = TK_ILLEGAL;
291 return 1;
292}
293
294/*
295** Run the parser on the given SQL string. The parser structure is
drhb19a2bc2001-09-16 00:13:26 +0000296** passed in. An SQLITE_ status code is returned. If an error occurs
297** and pzErrMsg!=NULL then an error message might be written into
298** memory obtained from malloc() and *pzErrMsg made to point to that
299** error message. Or maybe not.
drh75897232000-05-29 14:26:00 +0000300*/
301int sqliteRunParser(Parse *pParse, char *zSql, char **pzErrMsg){
302 int nErr = 0;
303 int i;
304 void *pEngine;
305 int once = 1;
drh75897232000-05-29 14:26:00 +0000306 extern void *sqliteParserAlloc(void*(*)(int));
307 extern void sqliteParserFree(void*, void(*)(void*));
drh338ea132001-02-11 16:56:24 +0000308 extern int sqliteParser(void*, int, Token, Parse*);
drh75897232000-05-29 14:26:00 +0000309
drh4c504392000-10-16 22:06:40 +0000310 pParse->db->flags &= ~SQLITE_Interrupt;
311 pParse->rc = SQLITE_OK;
drh75897232000-05-29 14:26:00 +0000312 i = 0;
drh22827922000-06-06 17:27:05 +0000313 sqliteParseInfoReset(pParse);
drh982cef72000-05-30 16:27:03 +0000314 pEngine = sqliteParserAlloc((void*(*)(int))malloc);
drh75897232000-05-29 14:26:00 +0000315 if( pEngine==0 ){
316 sqliteSetString(pzErrMsg, "out of memory", 0);
317 return 1;
318 }
drhdaffd0e2001-04-11 14:28:42 +0000319 while( sqlite_malloc_failed==0 && nErr==0 && i>=0 && zSql[i]!=0 ){
drh75897232000-05-29 14:26:00 +0000320 int tokenType;
321
drh4c504392000-10-16 22:06:40 +0000322 if( (pParse->db->flags & SQLITE_Interrupt)!=0 ){
323 pParse->rc = SQLITE_INTERRUPT;
324 sqliteSetString(pzErrMsg, "interrupt", 0);
325 break;
326 }
drh75897232000-05-29 14:26:00 +0000327 pParse->sLastToken.z = &zSql[i];
328 pParse->sLastToken.n = sqliteGetToken(&zSql[i], &tokenType);
329 i += pParse->sLastToken.n;
330 if( once ){
331 pParse->sFirstToken = pParse->sLastToken;
332 once = 0;
333 }
334 switch( tokenType ){
335 case TK_SPACE:
336 break;
337 case TK_COMMENT: {
drh75897232000-05-29 14:26:00 +0000338 break;
339 }
340 case TK_ILLEGAL:
drhc837e702000-06-08 16:26:24 +0000341 sqliteSetNString(pzErrMsg, "unrecognized token: \"", -1,
342 pParse->sLastToken.z, pParse->sLastToken.n, "\"", 1, 0);
drh75897232000-05-29 14:26:00 +0000343 nErr++;
344 break;
345 default:
346 sqliteParser(pEngine, tokenType, pParse->sLastToken, pParse);
drh6e142f52000-06-08 13:36:40 +0000347 if( pParse->zErrMsg && pParse->sErrToken.z ){
drh75897232000-05-29 14:26:00 +0000348 sqliteSetNString(pzErrMsg, "near \"", -1,
349 pParse->sErrToken.z, pParse->sErrToken.n,
350 "\": ", -1,
351 pParse->zErrMsg, -1,
352 0);
353 nErr++;
drh6e142f52000-06-08 13:36:40 +0000354 sqliteFree(pParse->zErrMsg);
355 pParse->zErrMsg = 0;
drh75897232000-05-29 14:26:00 +0000356 }
357 break;
358 }
359 }
drh4c504392000-10-16 22:06:40 +0000360 if( nErr==0 && (pParse->db->flags & SQLITE_Interrupt)==0 ){
drh75897232000-05-29 14:26:00 +0000361 sqliteParser(pEngine, 0, pParse->sLastToken, pParse);
drh6e142f52000-06-08 13:36:40 +0000362 if( pParse->zErrMsg && pParse->sErrToken.z ){
drh75897232000-05-29 14:26:00 +0000363 sqliteSetNString(pzErrMsg, "near \"", -1,
364 pParse->sErrToken.z, pParse->sErrToken.n,
365 "\": ", -1,
366 pParse->zErrMsg, -1,
367 0);
368 nErr++;
drh6e142f52000-06-08 13:36:40 +0000369 sqliteFree(pParse->zErrMsg);
370 pParse->zErrMsg = 0;
drh75897232000-05-29 14:26:00 +0000371 }
372 }
drhdcc581c2000-05-30 13:44:19 +0000373 sqliteParserFree(pEngine, free);
drh75897232000-05-29 14:26:00 +0000374 if( pParse->zErrMsg ){
375 if( pzErrMsg ){
drh6e142f52000-06-08 13:36:40 +0000376 sqliteFree(*pzErrMsg);
drh75897232000-05-29 14:26:00 +0000377 *pzErrMsg = pParse->zErrMsg;
378 }else{
379 sqliteFree(pParse->zErrMsg);
380 }
381 if( !nErr ) nErr++;
382 }
383 if( pParse->pVdbe ){
384 sqliteVdbeDelete(pParse->pVdbe);
385 pParse->pVdbe = 0;
386 }
387 if( pParse->pNewTable ){
388 sqliteDeleteTable(pParse->db, pParse->pNewTable);
389 pParse->pNewTable = 0;
390 }
drh22827922000-06-06 17:27:05 +0000391 sqliteParseInfoReset(pParse);
drh4c504392000-10-16 22:06:40 +0000392 if( nErr>0 && pParse->rc==SQLITE_OK ){
393 pParse->rc = SQLITE_ERROR;
394 }
drh75897232000-05-29 14:26:00 +0000395 return nErr;
396}