drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 1 | /* |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 2 | ** 2001 September 15 |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 3 | ** |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 4 | ** The author disclaims copyright to this source code. In place of |
| 5 | ** a legal notice, here is a blessing: |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 6 | ** |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 7 | ** May you do good and not evil. |
| 8 | ** May you find forgiveness for yourself and forgive others. |
| 9 | ** May you share freely, never taking more than you give. |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 10 | ** |
| 11 | ************************************************************************* |
| 12 | ** An tokenizer for SQL |
| 13 | ** |
| 14 | ** This file contains C code that splits an SQL input string up into |
| 15 | ** individual tokens and sends those tokens one-by-one over to the |
| 16 | ** parser for analysis. |
| 17 | ** |
drh | 6a53534 | 2001-10-19 16:44:56 +0000 | [diff] [blame^] | 18 | ** $Id: tokenize.c,v 1.29 2001/10/19 16:44:57 drh Exp $ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 19 | */ |
| 20 | #include "sqliteInt.h" |
drh | ad75e98 | 2001-10-09 04:19:46 +0000 | [diff] [blame] | 21 | #include "os.h" |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 22 | #include <ctype.h> |
drh | dcc581c | 2000-05-30 13:44:19 +0000 | [diff] [blame] | 23 | #include <stdlib.h> |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 24 | |
| 25 | /* |
| 26 | ** All the keywords of the SQL language are stored as in a hash |
| 27 | ** table composed of instances of the following structure. |
| 28 | */ |
| 29 | typedef struct Keyword Keyword; |
| 30 | struct Keyword { |
| 31 | char *zName; /* The keyword name */ |
| 32 | int len; /* Number of characters in the keyword */ |
| 33 | int tokenType; /* The token value for this keyword */ |
| 34 | Keyword *pNext; /* Next keyword with the same hash */ |
| 35 | }; |
| 36 | |
| 37 | /* |
| 38 | ** These are the keywords |
| 39 | */ |
| 40 | static Keyword aKeywordTable[] = { |
drh | fef5208 | 2000-06-06 01:50:43 +0000 | [diff] [blame] | 41 | { "ALL", 0, TK_ALL, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 42 | { "AND", 0, TK_AND, 0 }, |
| 43 | { "AS", 0, TK_AS, 0 }, |
| 44 | { "ASC", 0, TK_ASC, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 45 | { "BEGIN", 0, TK_BEGIN, 0 }, |
drh | fef5208 | 2000-06-06 01:50:43 +0000 | [diff] [blame] | 46 | { "BETWEEN", 0, TK_BETWEEN, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 47 | { "BY", 0, TK_BY, 0 }, |
| 48 | { "CHECK", 0, TK_CHECK, 0 }, |
drh | f57b14a | 2001-09-14 18:54:08 +0000 | [diff] [blame] | 49 | { "CLUSTER", 0, TK_CLUSTER, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 50 | { "COMMIT", 0, TK_COMMIT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 51 | { "CONSTRAINT", 0, TK_CONSTRAINT, 0 }, |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 52 | { "COPY", 0, TK_COPY, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 53 | { "CREATE", 0, TK_CREATE, 0 }, |
| 54 | { "DEFAULT", 0, TK_DEFAULT, 0 }, |
| 55 | { "DELETE", 0, TK_DELETE, 0 }, |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 56 | { "DELIMITERS", 0, TK_DELIMITERS, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 57 | { "DESC", 0, TK_DESC, 0 }, |
drh | efb7251 | 2000-05-31 20:00:52 +0000 | [diff] [blame] | 58 | { "DISTINCT", 0, TK_DISTINCT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 59 | { "DROP", 0, TK_DROP, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 60 | { "END", 0, TK_END, 0 }, |
drh | 82c3d63 | 2000-06-06 21:56:07 +0000 | [diff] [blame] | 61 | { "EXCEPT", 0, TK_EXCEPT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 62 | { "EXPLAIN", 0, TK_EXPLAIN, 0 }, |
| 63 | { "FROM", 0, TK_FROM, 0 }, |
drh | dce2cbe | 2000-05-31 02:27:49 +0000 | [diff] [blame] | 64 | { "GLOB", 0, TK_GLOB, 0 }, |
drh | 2282792 | 2000-06-06 17:27:05 +0000 | [diff] [blame] | 65 | { "GROUP", 0, TK_GROUP, 0 }, |
| 66 | { "HAVING", 0, TK_HAVING, 0 }, |
drh | fef5208 | 2000-06-06 01:50:43 +0000 | [diff] [blame] | 67 | { "IN", 0, TK_IN, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 68 | { "INDEX", 0, TK_INDEX, 0 }, |
| 69 | { "INSERT", 0, TK_INSERT, 0 }, |
drh | 82c3d63 | 2000-06-06 21:56:07 +0000 | [diff] [blame] | 70 | { "INTERSECT", 0, TK_INTERSECT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 71 | { "INTO", 0, TK_INTO, 0 }, |
| 72 | { "IS", 0, TK_IS, 0 }, |
| 73 | { "ISNULL", 0, TK_ISNULL, 0 }, |
| 74 | { "KEY", 0, TK_KEY, 0 }, |
drh | dce2cbe | 2000-05-31 02:27:49 +0000 | [diff] [blame] | 75 | { "LIKE", 0, TK_LIKE, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 76 | { "NOT", 0, TK_NOT, 0 }, |
| 77 | { "NOTNULL", 0, TK_NOTNULL, 0 }, |
| 78 | { "NULL", 0, TK_NULL, 0 }, |
| 79 | { "ON", 0, TK_ON, 0 }, |
| 80 | { "OR", 0, TK_OR, 0 }, |
| 81 | { "ORDER", 0, TK_ORDER, 0 }, |
drh | f57b14a | 2001-09-14 18:54:08 +0000 | [diff] [blame] | 82 | { "PRAGMA", 0, TK_PRAGMA, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 83 | { "PRIMARY", 0, TK_PRIMARY, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 84 | { "ROLLBACK", 0, TK_ROLLBACK, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 85 | { "SELECT", 0, TK_SELECT, 0 }, |
| 86 | { "SET", 0, TK_SET, 0 }, |
| 87 | { "TABLE", 0, TK_TABLE, 0 }, |
drh | f57b339 | 2001-10-08 13:22:32 +0000 | [diff] [blame] | 88 | { "TEMP", 0, TK_TEMP, 0 }, |
| 89 | { "TEMPORARY", 0, TK_TEMP, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 90 | { "TRANSACTION", 0, TK_TRANSACTION, 0 }, |
drh | 82c3d63 | 2000-06-06 21:56:07 +0000 | [diff] [blame] | 91 | { "UNION", 0, TK_UNION, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 92 | { "UNIQUE", 0, TK_UNIQUE, 0 }, |
| 93 | { "UPDATE", 0, TK_UPDATE, 0 }, |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 94 | { "USING", 0, TK_USING, 0 }, |
drh | dce2cbe | 2000-05-31 02:27:49 +0000 | [diff] [blame] | 95 | { "VACUUM", 0, TK_VACUUM, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 96 | { "VALUES", 0, TK_VALUES, 0 }, |
| 97 | { "WHERE", 0, TK_WHERE, 0 }, |
| 98 | }; |
| 99 | |
| 100 | /* |
| 101 | ** This is the hash table |
| 102 | */ |
drh | daffd0e | 2001-04-11 14:28:42 +0000 | [diff] [blame] | 103 | #define KEY_HASH_SIZE 71 |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 104 | static Keyword *apHashTable[KEY_HASH_SIZE]; |
| 105 | |
| 106 | |
| 107 | /* |
| 108 | ** This function looks up an identifier to determine if it is a |
| 109 | ** keyword. If it is a keyword, the token code of that keyword is |
| 110 | ** returned. If the input is not a keyword, TK_ID is returned. |
| 111 | */ |
| 112 | static int sqliteKeywordCode(const char *z, int n){ |
| 113 | int h; |
| 114 | Keyword *p; |
| 115 | if( aKeywordTable[0].len==0 ){ |
| 116 | /* Initialize the keyword hash table */ |
drh | ad75e98 | 2001-10-09 04:19:46 +0000 | [diff] [blame] | 117 | sqliteOsEnterMutex(); |
| 118 | if( aKeywordTable[0].len==0 ){ |
| 119 | int i; |
| 120 | int n; |
| 121 | n = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]); |
| 122 | for(i=0; i<n; i++){ |
| 123 | aKeywordTable[i].len = strlen(aKeywordTable[i].zName); |
| 124 | h = sqliteHashNoCase(aKeywordTable[i].zName, aKeywordTable[i].len); |
| 125 | h %= KEY_HASH_SIZE; |
| 126 | aKeywordTable[i].pNext = apHashTable[h]; |
| 127 | apHashTable[h] = &aKeywordTable[i]; |
| 128 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 129 | } |
drh | ad75e98 | 2001-10-09 04:19:46 +0000 | [diff] [blame] | 130 | sqliteOsLeaveMutex(); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 131 | } |
| 132 | h = sqliteHashNoCase(z, n) % KEY_HASH_SIZE; |
| 133 | for(p=apHashTable[h]; p; p=p->pNext){ |
| 134 | if( p->len==n && sqliteStrNICmp(p->zName, z, n)==0 ){ |
| 135 | return p->tokenType; |
| 136 | } |
| 137 | } |
| 138 | return TK_ID; |
| 139 | } |
| 140 | |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 141 | |
| 142 | /* |
| 143 | ** If X is a character that can be used in an identifier then |
| 144 | ** isIdChar[X] will be 1. Otherwise isIdChar[X] will be 0. |
| 145 | ** |
| 146 | ** In this implementation, an identifier can be a string of |
| 147 | ** alphabetic characters, digits, and "_" plus any character |
| 148 | ** with the high-order bit set. The latter rule means that |
| 149 | ** any sequence of UTF-8 characters or characters taken from |
| 150 | ** an extended ISO8859 character set can form an identifier. |
| 151 | */ |
| 152 | static const char isIdChar[] = { |
| 153 | /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ |
| 154 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ |
| 155 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ |
| 156 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ |
| 157 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ |
| 158 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ |
| 159 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ |
| 160 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ |
| 161 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ |
| 162 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 8x */ |
| 163 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 9x */ |
| 164 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ax */ |
| 165 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Bx */ |
| 166 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Cx */ |
| 167 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Dx */ |
| 168 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ex */ |
| 169 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Fx */ |
| 170 | }; |
| 171 | |
| 172 | |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 173 | /* |
| 174 | ** Return the length of the token that begins at z[0]. Return |
| 175 | ** -1 if the token is (or might be) incomplete. Store the token |
| 176 | ** type in *tokenType before returning. |
| 177 | */ |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 178 | static int sqliteGetToken(const unsigned char *z, int *tokenType){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 179 | int i; |
| 180 | switch( *z ){ |
drh | 30cab80 | 2000-08-09 17:17:25 +0000 | [diff] [blame] | 181 | case ' ': case '\t': case '\n': case '\f': case '\r': { |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 182 | for(i=1; z[i] && isspace(z[i]); i++){} |
| 183 | *tokenType = TK_SPACE; |
| 184 | return i; |
| 185 | } |
| 186 | case '-': { |
| 187 | if( z[1]==0 ) return -1; |
| 188 | if( z[1]=='-' ){ |
| 189 | for(i=2; z[i] && z[i]!='\n'; i++){} |
| 190 | *tokenType = TK_COMMENT; |
| 191 | return i; |
| 192 | } |
| 193 | *tokenType = TK_MINUS; |
| 194 | return 1; |
| 195 | } |
| 196 | case '(': { |
| 197 | *tokenType = TK_LP; |
| 198 | return 1; |
| 199 | } |
| 200 | case ')': { |
| 201 | *tokenType = TK_RP; |
| 202 | return 1; |
| 203 | } |
| 204 | case ';': { |
| 205 | *tokenType = TK_SEMI; |
| 206 | return 1; |
| 207 | } |
| 208 | case '+': { |
| 209 | *tokenType = TK_PLUS; |
| 210 | return 1; |
| 211 | } |
| 212 | case '*': { |
| 213 | *tokenType = TK_STAR; |
| 214 | return 1; |
| 215 | } |
| 216 | case '/': { |
| 217 | *tokenType = TK_SLASH; |
| 218 | return 1; |
| 219 | } |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 220 | case '%': { |
| 221 | *tokenType = TK_REM; |
| 222 | return 1; |
| 223 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 224 | case '=': { |
| 225 | *tokenType = TK_EQ; |
| 226 | return 1 + (z[1]=='='); |
| 227 | } |
| 228 | case '<': { |
| 229 | if( z[1]=='=' ){ |
| 230 | *tokenType = TK_LE; |
| 231 | return 2; |
| 232 | }else if( z[1]=='>' ){ |
| 233 | *tokenType = TK_NE; |
| 234 | return 2; |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 235 | }else if( z[1]=='<' ){ |
| 236 | *tokenType = TK_LSHIFT; |
| 237 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 238 | }else{ |
| 239 | *tokenType = TK_LT; |
| 240 | return 1; |
| 241 | } |
| 242 | } |
| 243 | case '>': { |
| 244 | if( z[1]=='=' ){ |
| 245 | *tokenType = TK_GE; |
| 246 | return 2; |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 247 | }else if( z[1]=='>' ){ |
| 248 | *tokenType = TK_RSHIFT; |
| 249 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 250 | }else{ |
| 251 | *tokenType = TK_GT; |
| 252 | return 1; |
| 253 | } |
| 254 | } |
| 255 | case '!': { |
| 256 | if( z[1]!='=' ){ |
| 257 | *tokenType = TK_ILLEGAL; |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 258 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 259 | }else{ |
| 260 | *tokenType = TK_NE; |
| 261 | return 2; |
| 262 | } |
| 263 | } |
drh | 0040077 | 2000-06-16 20:51:26 +0000 | [diff] [blame] | 264 | case '|': { |
| 265 | if( z[1]!='|' ){ |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 266 | *tokenType = TK_BITOR; |
drh | 0040077 | 2000-06-16 20:51:26 +0000 | [diff] [blame] | 267 | return 1; |
| 268 | }else{ |
| 269 | *tokenType = TK_CONCAT; |
| 270 | return 2; |
| 271 | } |
| 272 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 273 | case ',': { |
| 274 | *tokenType = TK_COMMA; |
| 275 | return 1; |
| 276 | } |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 277 | case '&': { |
| 278 | *tokenType = TK_BITAND; |
| 279 | return 1; |
| 280 | } |
| 281 | case '~': { |
| 282 | *tokenType = TK_BITNOT; |
| 283 | return 1; |
| 284 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 285 | case '\'': case '"': { |
| 286 | int delim = z[0]; |
| 287 | for(i=1; z[i]; i++){ |
| 288 | if( z[i]==delim ){ |
| 289 | if( z[i+1]==delim ){ |
| 290 | i++; |
| 291 | }else{ |
| 292 | break; |
| 293 | } |
| 294 | } |
| 295 | } |
| 296 | if( z[i] ) i++; |
| 297 | *tokenType = TK_STRING; |
| 298 | return i; |
| 299 | } |
| 300 | case '.': { |
| 301 | if( !isdigit(z[1]) ){ |
| 302 | *tokenType = TK_DOT; |
| 303 | return 1; |
| 304 | } |
| 305 | /* Fall thru into the next case */ |
| 306 | } |
| 307 | case '0': case '1': case '2': case '3': case '4': |
| 308 | case '5': case '6': case '7': case '8': case '9': { |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 309 | *tokenType = TK_INTEGER; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 310 | for(i=1; z[i] && isdigit(z[i]); i++){} |
| 311 | if( z[i]=='.' ){ |
| 312 | i++; |
| 313 | while( z[i] && isdigit(z[i]) ){ i++; } |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 314 | *tokenType = TK_FLOAT; |
| 315 | } |
| 316 | if( (z[i]=='e' || z[i]=='E') && |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 317 | ( isdigit(z[i+1]) |
| 318 | || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) |
| 319 | ) |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 320 | ){ |
| 321 | i += 2; |
| 322 | while( z[i] && isdigit(z[i]) ){ i++; } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 323 | *tokenType = TK_FLOAT; |
| 324 | }else if( z[0]=='.' ){ |
| 325 | *tokenType = TK_FLOAT; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 326 | } |
| 327 | return i; |
| 328 | } |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 329 | default: { |
| 330 | if( !isIdChar[*z] ){ |
| 331 | break; |
| 332 | } |
| 333 | for(i=1; isIdChar[z[i]]; i++){} |
drh | 6a53534 | 2001-10-19 16:44:56 +0000 | [diff] [blame^] | 334 | *tokenType = sqliteKeywordCode((char*)z, i); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 335 | return i; |
| 336 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 337 | } |
| 338 | *tokenType = TK_ILLEGAL; |
| 339 | return 1; |
| 340 | } |
| 341 | |
| 342 | /* |
| 343 | ** Run the parser on the given SQL string. The parser structure is |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 344 | ** passed in. An SQLITE_ status code is returned. If an error occurs |
| 345 | ** and pzErrMsg!=NULL then an error message might be written into |
| 346 | ** memory obtained from malloc() and *pzErrMsg made to point to that |
| 347 | ** error message. Or maybe not. |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 348 | */ |
| 349 | int sqliteRunParser(Parse *pParse, char *zSql, char **pzErrMsg){ |
| 350 | int nErr = 0; |
| 351 | int i; |
| 352 | void *pEngine; |
| 353 | int once = 1; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 354 | extern void *sqliteParserAlloc(void*(*)(int)); |
| 355 | extern void sqliteParserFree(void*, void(*)(void*)); |
drh | 338ea13 | 2001-02-11 16:56:24 +0000 | [diff] [blame] | 356 | extern int sqliteParser(void*, int, Token, Parse*); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 357 | |
drh | 4c50439 | 2000-10-16 22:06:40 +0000 | [diff] [blame] | 358 | pParse->db->flags &= ~SQLITE_Interrupt; |
| 359 | pParse->rc = SQLITE_OK; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 360 | i = 0; |
drh | 2282792 | 2000-06-06 17:27:05 +0000 | [diff] [blame] | 361 | sqliteParseInfoReset(pParse); |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 362 | pEngine = sqliteParserAlloc((void*(*)(int))malloc); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 363 | if( pEngine==0 ){ |
| 364 | sqliteSetString(pzErrMsg, "out of memory", 0); |
| 365 | return 1; |
| 366 | } |
drh | daffd0e | 2001-04-11 14:28:42 +0000 | [diff] [blame] | 367 | while( sqlite_malloc_failed==0 && nErr==0 && i>=0 && zSql[i]!=0 ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 368 | int tokenType; |
| 369 | |
drh | 4c50439 | 2000-10-16 22:06:40 +0000 | [diff] [blame] | 370 | if( (pParse->db->flags & SQLITE_Interrupt)!=0 ){ |
| 371 | pParse->rc = SQLITE_INTERRUPT; |
| 372 | sqliteSetString(pzErrMsg, "interrupt", 0); |
| 373 | break; |
| 374 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 375 | pParse->sLastToken.z = &zSql[i]; |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 376 | pParse->sLastToken.n = sqliteGetToken((unsigned char*)&zSql[i], &tokenType); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 377 | i += pParse->sLastToken.n; |
| 378 | if( once ){ |
| 379 | pParse->sFirstToken = pParse->sLastToken; |
| 380 | once = 0; |
| 381 | } |
| 382 | switch( tokenType ){ |
| 383 | case TK_SPACE: |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 384 | case TK_COMMENT: { |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 385 | break; |
| 386 | } |
| 387 | case TK_ILLEGAL: |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 388 | sqliteSetNString(pzErrMsg, "unrecognized token: \"", -1, |
| 389 | pParse->sLastToken.z, pParse->sLastToken.n, "\"", 1, 0); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 390 | nErr++; |
| 391 | break; |
| 392 | default: |
| 393 | sqliteParser(pEngine, tokenType, pParse->sLastToken, pParse); |
drh | 6e142f5 | 2000-06-08 13:36:40 +0000 | [diff] [blame] | 394 | if( pParse->zErrMsg && pParse->sErrToken.z ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 395 | sqliteSetNString(pzErrMsg, "near \"", -1, |
| 396 | pParse->sErrToken.z, pParse->sErrToken.n, |
| 397 | "\": ", -1, |
| 398 | pParse->zErrMsg, -1, |
| 399 | 0); |
| 400 | nErr++; |
drh | 6e142f5 | 2000-06-08 13:36:40 +0000 | [diff] [blame] | 401 | sqliteFree(pParse->zErrMsg); |
| 402 | pParse->zErrMsg = 0; |
drh | ecdc753 | 2001-09-23 02:35:53 +0000 | [diff] [blame] | 403 | }else if( pParse->rc!=SQLITE_OK ){ |
| 404 | sqliteSetString(pzErrMsg, sqliteErrStr(pParse->rc), 0); |
| 405 | nErr++; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 406 | } |
| 407 | break; |
| 408 | } |
| 409 | } |
drh | 4c50439 | 2000-10-16 22:06:40 +0000 | [diff] [blame] | 410 | if( nErr==0 && (pParse->db->flags & SQLITE_Interrupt)==0 ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 411 | sqliteParser(pEngine, 0, pParse->sLastToken, pParse); |
drh | 6e142f5 | 2000-06-08 13:36:40 +0000 | [diff] [blame] | 412 | if( pParse->zErrMsg && pParse->sErrToken.z ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 413 | sqliteSetNString(pzErrMsg, "near \"", -1, |
| 414 | pParse->sErrToken.z, pParse->sErrToken.n, |
| 415 | "\": ", -1, |
| 416 | pParse->zErrMsg, -1, |
| 417 | 0); |
| 418 | nErr++; |
drh | 6e142f5 | 2000-06-08 13:36:40 +0000 | [diff] [blame] | 419 | sqliteFree(pParse->zErrMsg); |
| 420 | pParse->zErrMsg = 0; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 421 | } |
| 422 | } |
drh | dcc581c | 2000-05-30 13:44:19 +0000 | [diff] [blame] | 423 | sqliteParserFree(pEngine, free); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 424 | if( pParse->zErrMsg ){ |
| 425 | if( pzErrMsg ){ |
drh | 6e142f5 | 2000-06-08 13:36:40 +0000 | [diff] [blame] | 426 | sqliteFree(*pzErrMsg); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 427 | *pzErrMsg = pParse->zErrMsg; |
| 428 | }else{ |
| 429 | sqliteFree(pParse->zErrMsg); |
| 430 | } |
| 431 | if( !nErr ) nErr++; |
| 432 | } |
| 433 | if( pParse->pVdbe ){ |
| 434 | sqliteVdbeDelete(pParse->pVdbe); |
| 435 | pParse->pVdbe = 0; |
| 436 | } |
| 437 | if( pParse->pNewTable ){ |
| 438 | sqliteDeleteTable(pParse->db, pParse->pNewTable); |
| 439 | pParse->pNewTable = 0; |
| 440 | } |
drh | 2282792 | 2000-06-06 17:27:05 +0000 | [diff] [blame] | 441 | sqliteParseInfoReset(pParse); |
drh | 4c50439 | 2000-10-16 22:06:40 +0000 | [diff] [blame] | 442 | if( nErr>0 && pParse->rc==SQLITE_OK ){ |
| 443 | pParse->rc = SQLITE_ERROR; |
| 444 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 445 | return nErr; |
| 446 | } |