drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 1 | /* |
| 2 | ** Copyright (c) 1999, 2000 D. Richard Hipp |
| 3 | ** |
| 4 | ** This program is free software; you can redistribute it and/or |
| 5 | ** modify it under the terms of the GNU General Public |
| 6 | ** License as published by the Free Software Foundation; either |
| 7 | ** version 2 of the License, or (at your option) any later version. |
| 8 | ** |
| 9 | ** This program is distributed in the hope that it will be useful, |
| 10 | ** but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | ** General Public License for more details. |
| 13 | ** |
| 14 | ** You should have received a copy of the GNU General Public |
| 15 | ** License along with this library; if not, write to the |
| 16 | ** Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 17 | ** Boston, MA 02111-1307, USA. |
| 18 | ** |
| 19 | ** Author contact information: |
| 20 | ** drh@hwaci.com |
| 21 | ** http://www.hwaci.com/drh/ |
| 22 | ** |
| 23 | ************************************************************************* |
| 24 | ** An tokenizer for SQL |
| 25 | ** |
| 26 | ** This file contains C code that splits an SQL input string up into |
| 27 | ** individual tokens and sends those tokens one-by-one over to the |
| 28 | ** parser for analysis. |
| 29 | ** |
drh | efb7251 | 2000-05-31 20:00:52 +0000 | [diff] [blame^] | 30 | ** $Id: tokenize.c,v 1.5 2000/05/31 20:00:53 drh Exp $ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 31 | */ |
| 32 | #include "sqliteInt.h" |
| 33 | #include <ctype.h> |
drh | dcc581c | 2000-05-30 13:44:19 +0000 | [diff] [blame] | 34 | #include <stdlib.h> |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 35 | |
| 36 | /* |
| 37 | ** All the keywords of the SQL language are stored as in a hash |
| 38 | ** table composed of instances of the following structure. |
| 39 | */ |
| 40 | typedef struct Keyword Keyword; |
| 41 | struct Keyword { |
| 42 | char *zName; /* The keyword name */ |
| 43 | int len; /* Number of characters in the keyword */ |
| 44 | int tokenType; /* The token value for this keyword */ |
| 45 | Keyword *pNext; /* Next keyword with the same hash */ |
| 46 | }; |
| 47 | |
| 48 | /* |
| 49 | ** These are the keywords |
| 50 | */ |
| 51 | static Keyword aKeywordTable[] = { |
| 52 | { "AND", 0, TK_AND, 0 }, |
| 53 | { "AS", 0, TK_AS, 0 }, |
| 54 | { "ASC", 0, TK_ASC, 0 }, |
| 55 | { "BY", 0, TK_BY, 0 }, |
| 56 | { "CHECK", 0, TK_CHECK, 0 }, |
| 57 | { "CONSTRAINT", 0, TK_CONSTRAINT, 0 }, |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 58 | { "COPY", 0, TK_COPY, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 59 | { "CREATE", 0, TK_CREATE, 0 }, |
| 60 | { "DEFAULT", 0, TK_DEFAULT, 0 }, |
| 61 | { "DELETE", 0, TK_DELETE, 0 }, |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 62 | { "DELIMITERS", 0, TK_DELIMITERS, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 63 | { "DESC", 0, TK_DESC, 0 }, |
drh | efb7251 | 2000-05-31 20:00:52 +0000 | [diff] [blame^] | 64 | { "DISTINCT", 0, TK_DISTINCT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 65 | { "DROP", 0, TK_DROP, 0 }, |
| 66 | { "EXPLAIN", 0, TK_EXPLAIN, 0 }, |
| 67 | { "FROM", 0, TK_FROM, 0 }, |
drh | dce2cbe | 2000-05-31 02:27:49 +0000 | [diff] [blame] | 68 | { "GLOB", 0, TK_GLOB, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 69 | { "INDEX", 0, TK_INDEX, 0 }, |
| 70 | { "INSERT", 0, TK_INSERT, 0 }, |
| 71 | { "INTO", 0, TK_INTO, 0 }, |
| 72 | { "IS", 0, TK_IS, 0 }, |
| 73 | { "ISNULL", 0, TK_ISNULL, 0 }, |
| 74 | { "KEY", 0, TK_KEY, 0 }, |
drh | dce2cbe | 2000-05-31 02:27:49 +0000 | [diff] [blame] | 75 | { "LIKE", 0, TK_LIKE, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 76 | { "NOT", 0, TK_NOT, 0 }, |
| 77 | { "NOTNULL", 0, TK_NOTNULL, 0 }, |
| 78 | { "NULL", 0, TK_NULL, 0 }, |
| 79 | { "ON", 0, TK_ON, 0 }, |
| 80 | { "OR", 0, TK_OR, 0 }, |
| 81 | { "ORDER", 0, TK_ORDER, 0 }, |
| 82 | { "PRIMARY", 0, TK_PRIMARY, 0 }, |
| 83 | { "SELECT", 0, TK_SELECT, 0 }, |
| 84 | { "SET", 0, TK_SET, 0 }, |
| 85 | { "TABLE", 0, TK_TABLE, 0 }, |
| 86 | { "UNIQUE", 0, TK_UNIQUE, 0 }, |
| 87 | { "UPDATE", 0, TK_UPDATE, 0 }, |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 88 | { "USING", 0, TK_USING, 0 }, |
drh | dce2cbe | 2000-05-31 02:27:49 +0000 | [diff] [blame] | 89 | { "VACUUM", 0, TK_VACUUM, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 90 | { "VALUES", 0, TK_VALUES, 0 }, |
| 91 | { "WHERE", 0, TK_WHERE, 0 }, |
| 92 | }; |
| 93 | |
| 94 | /* |
| 95 | ** This is the hash table |
| 96 | */ |
| 97 | #define KEY_HASH_SIZE 37 |
| 98 | static Keyword *apHashTable[KEY_HASH_SIZE]; |
| 99 | |
| 100 | |
| 101 | /* |
| 102 | ** This function looks up an identifier to determine if it is a |
| 103 | ** keyword. If it is a keyword, the token code of that keyword is |
| 104 | ** returned. If the input is not a keyword, TK_ID is returned. |
| 105 | */ |
| 106 | static int sqliteKeywordCode(const char *z, int n){ |
| 107 | int h; |
| 108 | Keyword *p; |
| 109 | if( aKeywordTable[0].len==0 ){ |
| 110 | /* Initialize the keyword hash table */ |
| 111 | int i; |
| 112 | int n; |
| 113 | n = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]); |
| 114 | for(i=0; i<n; i++){ |
| 115 | aKeywordTable[i].len = strlen(aKeywordTable[i].zName); |
| 116 | h = sqliteHashNoCase(aKeywordTable[i].zName, aKeywordTable[i].len); |
| 117 | h %= KEY_HASH_SIZE; |
| 118 | aKeywordTable[i].pNext = apHashTable[h]; |
| 119 | apHashTable[h] = &aKeywordTable[i]; |
| 120 | } |
| 121 | } |
| 122 | h = sqliteHashNoCase(z, n) % KEY_HASH_SIZE; |
| 123 | for(p=apHashTable[h]; p; p=p->pNext){ |
| 124 | if( p->len==n && sqliteStrNICmp(p->zName, z, n)==0 ){ |
| 125 | return p->tokenType; |
| 126 | } |
| 127 | } |
| 128 | return TK_ID; |
| 129 | } |
| 130 | |
| 131 | /* |
| 132 | ** Return the length of the token that begins at z[0]. Return |
| 133 | ** -1 if the token is (or might be) incomplete. Store the token |
| 134 | ** type in *tokenType before returning. |
| 135 | */ |
| 136 | int sqliteGetToken(const char *z, int *tokenType){ |
| 137 | int i; |
| 138 | switch( *z ){ |
| 139 | case ' ': case '\t': case '\n': case '\f': { |
| 140 | for(i=1; z[i] && isspace(z[i]); i++){} |
| 141 | *tokenType = TK_SPACE; |
| 142 | return i; |
| 143 | } |
| 144 | case '-': { |
| 145 | if( z[1]==0 ) return -1; |
| 146 | if( z[1]=='-' ){ |
| 147 | for(i=2; z[i] && z[i]!='\n'; i++){} |
| 148 | *tokenType = TK_COMMENT; |
| 149 | return i; |
| 150 | } |
| 151 | *tokenType = TK_MINUS; |
| 152 | return 1; |
| 153 | } |
| 154 | case '(': { |
| 155 | *tokenType = TK_LP; |
| 156 | return 1; |
| 157 | } |
| 158 | case ')': { |
| 159 | *tokenType = TK_RP; |
| 160 | return 1; |
| 161 | } |
| 162 | case ';': { |
| 163 | *tokenType = TK_SEMI; |
| 164 | return 1; |
| 165 | } |
| 166 | case '+': { |
| 167 | *tokenType = TK_PLUS; |
| 168 | return 1; |
| 169 | } |
| 170 | case '*': { |
| 171 | *tokenType = TK_STAR; |
| 172 | return 1; |
| 173 | } |
| 174 | case '/': { |
| 175 | *tokenType = TK_SLASH; |
| 176 | return 1; |
| 177 | } |
| 178 | case '=': { |
| 179 | *tokenType = TK_EQ; |
| 180 | return 1 + (z[1]=='='); |
| 181 | } |
| 182 | case '<': { |
| 183 | if( z[1]=='=' ){ |
| 184 | *tokenType = TK_LE; |
| 185 | return 2; |
| 186 | }else if( z[1]=='>' ){ |
| 187 | *tokenType = TK_NE; |
| 188 | return 2; |
| 189 | }else{ |
| 190 | *tokenType = TK_LT; |
| 191 | return 1; |
| 192 | } |
| 193 | } |
| 194 | case '>': { |
| 195 | if( z[1]=='=' ){ |
| 196 | *tokenType = TK_GE; |
| 197 | return 2; |
| 198 | }else{ |
| 199 | *tokenType = TK_GT; |
| 200 | return 1; |
| 201 | } |
| 202 | } |
| 203 | case '!': { |
| 204 | if( z[1]!='=' ){ |
| 205 | *tokenType = TK_ILLEGAL; |
| 206 | return 1; |
| 207 | }else{ |
| 208 | *tokenType = TK_NE; |
| 209 | return 2; |
| 210 | } |
| 211 | } |
| 212 | case ',': { |
| 213 | *tokenType = TK_COMMA; |
| 214 | return 1; |
| 215 | } |
| 216 | case '\'': case '"': { |
| 217 | int delim = z[0]; |
| 218 | for(i=1; z[i]; i++){ |
| 219 | if( z[i]==delim ){ |
| 220 | if( z[i+1]==delim ){ |
| 221 | i++; |
| 222 | }else{ |
| 223 | break; |
| 224 | } |
| 225 | } |
| 226 | } |
| 227 | if( z[i] ) i++; |
| 228 | *tokenType = TK_STRING; |
| 229 | return i; |
| 230 | } |
| 231 | case '.': { |
| 232 | if( !isdigit(z[1]) ){ |
| 233 | *tokenType = TK_DOT; |
| 234 | return 1; |
| 235 | } |
| 236 | /* Fall thru into the next case */ |
| 237 | } |
| 238 | case '0': case '1': case '2': case '3': case '4': |
| 239 | case '5': case '6': case '7': case '8': case '9': { |
| 240 | for(i=1; z[i] && isdigit(z[i]); i++){} |
| 241 | if( z[i]=='.' ){ |
| 242 | i++; |
| 243 | while( z[i] && isdigit(z[i]) ){ i++; } |
| 244 | if( (z[i]=='e' || z[i]=='E') && |
| 245 | ( isdigit(z[i+1]) |
| 246 | || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) |
| 247 | ) |
| 248 | ){ |
| 249 | i += 2; |
| 250 | while( z[i] && isdigit(z[i]) ){ i++; } |
| 251 | } |
| 252 | *tokenType = TK_FLOAT; |
| 253 | }else if( z[0]=='.' ){ |
| 254 | *tokenType = TK_FLOAT; |
| 255 | }else{ |
| 256 | *tokenType = TK_INTEGER; |
| 257 | } |
| 258 | return i; |
| 259 | } |
| 260 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': |
| 261 | case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': |
| 262 | case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': |
| 263 | case 's': case 't': case 'u': case 'v': case 'w': case 'x': |
| 264 | case 'y': case 'z': case '_': |
| 265 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': |
| 266 | case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': |
| 267 | case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': |
| 268 | case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': |
| 269 | case 'Y': case 'Z': { |
| 270 | for(i=1; z[i] && (isalnum(z[i]) || z[i]=='_'); i++){} |
| 271 | *tokenType = sqliteKeywordCode(z, i); |
| 272 | return i; |
| 273 | } |
| 274 | default: { |
| 275 | break; |
| 276 | } |
| 277 | } |
| 278 | *tokenType = TK_ILLEGAL; |
| 279 | return 1; |
| 280 | } |
| 281 | |
| 282 | /* |
| 283 | ** Run the parser on the given SQL string. The parser structure is |
| 284 | ** passed in. Return the number of errors. |
| 285 | */ |
| 286 | int sqliteRunParser(Parse *pParse, char *zSql, char **pzErrMsg){ |
| 287 | int nErr = 0; |
| 288 | int i; |
| 289 | void *pEngine; |
| 290 | int once = 1; |
| 291 | static FILE *trace = 0; |
| 292 | extern void *sqliteParserAlloc(void*(*)(int)); |
| 293 | extern void sqliteParserFree(void*, void(*)(void*)); |
| 294 | extern int sqliteParser(void*, int, ...); |
| 295 | extern void sqliteParserTrace(FILE*, char *); |
| 296 | |
| 297 | i = 0; |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 298 | pEngine = sqliteParserAlloc((void*(*)(int))malloc); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 299 | if( pEngine==0 ){ |
| 300 | sqliteSetString(pzErrMsg, "out of memory", 0); |
| 301 | return 1; |
| 302 | } |
| 303 | sqliteParserTrace(trace, "parser: "); |
| 304 | while( nErr==0 && i>=0 && zSql[i]!=0 ){ |
| 305 | int tokenType; |
| 306 | |
| 307 | pParse->sLastToken.z = &zSql[i]; |
| 308 | pParse->sLastToken.n = sqliteGetToken(&zSql[i], &tokenType); |
| 309 | i += pParse->sLastToken.n; |
| 310 | if( once ){ |
| 311 | pParse->sFirstToken = pParse->sLastToken; |
| 312 | once = 0; |
| 313 | } |
| 314 | switch( tokenType ){ |
| 315 | case TK_SPACE: |
| 316 | break; |
| 317 | case TK_COMMENT: { |
| 318 | /* Various debugging modes can be turned on and off using |
| 319 | ** special SQL comments. Check for the special comments |
| 320 | ** here and take approriate action if found. |
| 321 | */ |
| 322 | char *z = pParse->sLastToken.z; |
| 323 | if( sqliteStrNICmp(z,"--parser-trace-on--",19)==0 ){ |
| 324 | trace = stderr; |
| 325 | sqliteParserTrace(trace, "parser: "); |
| 326 | }else if( sqliteStrNICmp(z,"--parser-trace-off--", 20)==0 ){ |
| 327 | trace = 0; |
| 328 | sqliteParserTrace(trace, "parser: "); |
| 329 | }else if( sqliteStrNICmp(z,"--vdbe-trace-on--",17)==0 ){ |
| 330 | pParse->db->flags |= SQLITE_VdbeTrace; |
| 331 | }else if( sqliteStrNICmp(z,"--vdbe-trace-off--", 19)==0 ){ |
| 332 | pParse->db->flags &= ~SQLITE_VdbeTrace; |
| 333 | } |
| 334 | break; |
| 335 | } |
| 336 | case TK_ILLEGAL: |
| 337 | sqliteSetNString(pzErrMsg, "illegal token: \"", -1, |
| 338 | pParse->sLastToken.z, pParse->sLastToken.n, 0); |
| 339 | nErr++; |
| 340 | break; |
| 341 | default: |
| 342 | sqliteParser(pEngine, tokenType, pParse->sLastToken, pParse); |
| 343 | if( pParse->zErrMsg ){ |
| 344 | sqliteSetNString(pzErrMsg, "near \"", -1, |
| 345 | pParse->sErrToken.z, pParse->sErrToken.n, |
| 346 | "\": ", -1, |
| 347 | pParse->zErrMsg, -1, |
| 348 | 0); |
| 349 | nErr++; |
| 350 | } |
| 351 | break; |
| 352 | } |
| 353 | } |
| 354 | if( nErr==0 ){ |
| 355 | sqliteParser(pEngine, 0, pParse->sLastToken, pParse); |
| 356 | if( pParse->zErrMsg ){ |
| 357 | sqliteSetNString(pzErrMsg, "near \"", -1, |
| 358 | pParse->sErrToken.z, pParse->sErrToken.n, |
| 359 | "\": ", -1, |
| 360 | pParse->zErrMsg, -1, |
| 361 | 0); |
| 362 | nErr++; |
| 363 | } |
| 364 | } |
drh | dcc581c | 2000-05-30 13:44:19 +0000 | [diff] [blame] | 365 | sqliteParserFree(pEngine, free); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 366 | if( pParse->zErrMsg ){ |
| 367 | if( pzErrMsg ){ |
| 368 | *pzErrMsg = pParse->zErrMsg; |
| 369 | }else{ |
| 370 | sqliteFree(pParse->zErrMsg); |
| 371 | } |
| 372 | if( !nErr ) nErr++; |
| 373 | } |
| 374 | if( pParse->pVdbe ){ |
| 375 | sqliteVdbeDelete(pParse->pVdbe); |
| 376 | pParse->pVdbe = 0; |
| 377 | } |
| 378 | if( pParse->pNewTable ){ |
| 379 | sqliteDeleteTable(pParse->db, pParse->pNewTable); |
| 380 | pParse->pNewTable = 0; |
| 381 | } |
| 382 | return nErr; |
| 383 | } |