drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 1 | /* |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 2 | ** 2001 September 15 |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 3 | ** |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 4 | ** The author disclaims copyright to this source code. In place of |
| 5 | ** a legal notice, here is a blessing: |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 6 | ** |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 7 | ** May you do good and not evil. |
| 8 | ** May you find forgiveness for yourself and forgive others. |
| 9 | ** May you share freely, never taking more than you give. |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 10 | ** |
| 11 | ************************************************************************* |
| 12 | ** An tokenizer for SQL |
| 13 | ** |
| 14 | ** This file contains C code that splits an SQL input string up into |
| 15 | ** individual tokens and sends those tokens one-by-one over to the |
| 16 | ** parser for analysis. |
| 17 | ** |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 18 | ** $Id: tokenize.c,v 1.89 2004/09/25 15:25:26 drh Exp $ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 19 | */ |
| 20 | #include "sqliteInt.h" |
drh | ad75e98 | 2001-10-09 04:19:46 +0000 | [diff] [blame] | 21 | #include "os.h" |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 22 | #include <ctype.h> |
drh | dcc581c | 2000-05-30 13:44:19 +0000 | [diff] [blame] | 23 | #include <stdlib.h> |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 24 | |
| 25 | /* |
| 26 | ** All the keywords of the SQL language are stored as in a hash |
| 27 | ** table composed of instances of the following structure. |
| 28 | */ |
| 29 | typedef struct Keyword Keyword; |
| 30 | struct Keyword { |
| 31 | char *zName; /* The keyword name */ |
drh | ba21256 | 2004-01-08 02:17:31 +0000 | [diff] [blame] | 32 | u8 tokenType; /* Token value for this keyword */ |
| 33 | u8 len; /* Length of this keyword */ |
| 34 | u8 iNext; /* Index in aKeywordTable[] of next with same hash */ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 35 | }; |
| 36 | |
| 37 | /* |
| 38 | ** These are the keywords |
| 39 | */ |
| 40 | static Keyword aKeywordTable[] = { |
drh | ba21256 | 2004-01-08 02:17:31 +0000 | [diff] [blame] | 41 | { "ABORT", TK_ABORT, }, |
| 42 | { "AFTER", TK_AFTER, }, |
| 43 | { "ALL", TK_ALL, }, |
| 44 | { "AND", TK_AND, }, |
| 45 | { "AS", TK_AS, }, |
| 46 | { "ASC", TK_ASC, }, |
| 47 | { "ATTACH", TK_ATTACH, }, |
| 48 | { "BEFORE", TK_BEFORE, }, |
| 49 | { "BEGIN", TK_BEGIN, }, |
| 50 | { "BETWEEN", TK_BETWEEN, }, |
| 51 | { "BY", TK_BY, }, |
| 52 | { "CASCADE", TK_CASCADE, }, |
| 53 | { "CASE", TK_CASE, }, |
| 54 | { "CHECK", TK_CHECK, }, |
drh | ba21256 | 2004-01-08 02:17:31 +0000 | [diff] [blame] | 55 | { "COLLATE", TK_COLLATE, }, |
| 56 | { "COMMIT", TK_COMMIT, }, |
| 57 | { "CONFLICT", TK_CONFLICT, }, |
| 58 | { "CONSTRAINT", TK_CONSTRAINT, }, |
drh | ba21256 | 2004-01-08 02:17:31 +0000 | [diff] [blame] | 59 | { "CREATE", TK_CREATE, }, |
| 60 | { "CROSS", TK_JOIN_KW, }, |
| 61 | { "DATABASE", TK_DATABASE, }, |
| 62 | { "DEFAULT", TK_DEFAULT, }, |
| 63 | { "DEFERRED", TK_DEFERRED, }, |
| 64 | { "DEFERRABLE", TK_DEFERRABLE, }, |
| 65 | { "DELETE", TK_DELETE, }, |
drh | ba21256 | 2004-01-08 02:17:31 +0000 | [diff] [blame] | 66 | { "DESC", TK_DESC, }, |
| 67 | { "DETACH", TK_DETACH, }, |
| 68 | { "DISTINCT", TK_DISTINCT, }, |
| 69 | { "DROP", TK_DROP, }, |
| 70 | { "END", TK_END, }, |
| 71 | { "EACH", TK_EACH, }, |
| 72 | { "ELSE", TK_ELSE, }, |
| 73 | { "EXCEPT", TK_EXCEPT, }, |
| 74 | { "EXPLAIN", TK_EXPLAIN, }, |
| 75 | { "FAIL", TK_FAIL, }, |
| 76 | { "FOR", TK_FOR, }, |
| 77 | { "FOREIGN", TK_FOREIGN, }, |
| 78 | { "FROM", TK_FROM, }, |
| 79 | { "FULL", TK_JOIN_KW, }, |
| 80 | { "GLOB", TK_GLOB, }, |
| 81 | { "GROUP", TK_GROUP, }, |
| 82 | { "HAVING", TK_HAVING, }, |
| 83 | { "IGNORE", TK_IGNORE, }, |
| 84 | { "IMMEDIATE", TK_IMMEDIATE, }, |
| 85 | { "IN", TK_IN, }, |
| 86 | { "INDEX", TK_INDEX, }, |
| 87 | { "INITIALLY", TK_INITIALLY, }, |
| 88 | { "INNER", TK_JOIN_KW, }, |
| 89 | { "INSERT", TK_INSERT, }, |
| 90 | { "INSTEAD", TK_INSTEAD, }, |
| 91 | { "INTERSECT", TK_INTERSECT, }, |
| 92 | { "INTO", TK_INTO, }, |
| 93 | { "IS", TK_IS, }, |
| 94 | { "ISNULL", TK_ISNULL, }, |
| 95 | { "JOIN", TK_JOIN, }, |
| 96 | { "KEY", TK_KEY, }, |
| 97 | { "LEFT", TK_JOIN_KW, }, |
| 98 | { "LIKE", TK_LIKE, }, |
| 99 | { "LIMIT", TK_LIMIT, }, |
| 100 | { "MATCH", TK_MATCH, }, |
| 101 | { "NATURAL", TK_JOIN_KW, }, |
| 102 | { "NOT", TK_NOT, }, |
| 103 | { "NOTNULL", TK_NOTNULL, }, |
| 104 | { "NULL", TK_NULL, }, |
| 105 | { "OF", TK_OF, }, |
| 106 | { "OFFSET", TK_OFFSET, }, |
| 107 | { "ON", TK_ON, }, |
| 108 | { "OR", TK_OR, }, |
| 109 | { "ORDER", TK_ORDER, }, |
| 110 | { "OUTER", TK_JOIN_KW, }, |
| 111 | { "PRAGMA", TK_PRAGMA, }, |
| 112 | { "PRIMARY", TK_PRIMARY, }, |
| 113 | { "RAISE", TK_RAISE, }, |
| 114 | { "REFERENCES", TK_REFERENCES, }, |
| 115 | { "REPLACE", TK_REPLACE, }, |
| 116 | { "RESTRICT", TK_RESTRICT, }, |
| 117 | { "RIGHT", TK_JOIN_KW, }, |
| 118 | { "ROLLBACK", TK_ROLLBACK, }, |
| 119 | { "ROW", TK_ROW, }, |
| 120 | { "SELECT", TK_SELECT, }, |
| 121 | { "SET", TK_SET, }, |
| 122 | { "STATEMENT", TK_STATEMENT, }, |
| 123 | { "TABLE", TK_TABLE, }, |
| 124 | { "TEMP", TK_TEMP, }, |
| 125 | { "TEMPORARY", TK_TEMP, }, |
| 126 | { "THEN", TK_THEN, }, |
| 127 | { "TRANSACTION", TK_TRANSACTION, }, |
| 128 | { "TRIGGER", TK_TRIGGER, }, |
| 129 | { "UNION", TK_UNION, }, |
| 130 | { "UNIQUE", TK_UNIQUE, }, |
| 131 | { "UPDATE", TK_UPDATE, }, |
| 132 | { "USING", TK_USING, }, |
| 133 | { "VACUUM", TK_VACUUM, }, |
| 134 | { "VALUES", TK_VALUES, }, |
| 135 | { "VIEW", TK_VIEW, }, |
| 136 | { "WHEN", TK_WHEN, }, |
| 137 | { "WHERE", TK_WHERE, }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 138 | }; |
| 139 | |
| 140 | /* |
| 141 | ** This is the hash table |
| 142 | */ |
drh | ba21256 | 2004-01-08 02:17:31 +0000 | [diff] [blame] | 143 | #define KEY_HASH_SIZE 101 |
| 144 | static u8 aiHashTable[KEY_HASH_SIZE]; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 145 | |
| 146 | |
| 147 | /* |
| 148 | ** This function looks up an identifier to determine if it is a |
| 149 | ** keyword. If it is a keyword, the token code of that keyword is |
| 150 | ** returned. If the input is not a keyword, TK_ID is returned. |
| 151 | */ |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 152 | int sqlite3KeywordCode(const char *z, int n){ |
drh | ba21256 | 2004-01-08 02:17:31 +0000 | [diff] [blame] | 153 | int h, i; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 154 | Keyword *p; |
drh | 93a5c6b | 2003-12-23 02:17:35 +0000 | [diff] [blame] | 155 | static char needInit = 1; |
| 156 | if( needInit ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 157 | /* Initialize the keyword hash table */ |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 158 | sqlite3OsEnterMutex(); |
drh | 93a5c6b | 2003-12-23 02:17:35 +0000 | [diff] [blame] | 159 | if( needInit ){ |
drh | ba21256 | 2004-01-08 02:17:31 +0000 | [diff] [blame] | 160 | int nk; |
| 161 | nk = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]); |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 162 | for(i=0, p=aKeywordTable; i<nk; i++, p++){ |
| 163 | const char *zName = p->zName; |
| 164 | int len = p->len = strlen(zName); |
| 165 | h = sqlite3HashNoCase(zName, len) % KEY_HASH_SIZE; |
| 166 | p->iNext = aiHashTable[h]; |
drh | ba21256 | 2004-01-08 02:17:31 +0000 | [diff] [blame] | 167 | aiHashTable[h] = i+1; |
drh | ad75e98 | 2001-10-09 04:19:46 +0000 | [diff] [blame] | 168 | } |
drh | 93a5c6b | 2003-12-23 02:17:35 +0000 | [diff] [blame] | 169 | needInit = 0; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 170 | } |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 171 | sqlite3OsLeaveMutex(); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 172 | } |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 173 | h = sqlite3HashNoCase(z, n) % KEY_HASH_SIZE; |
drh | ba21256 | 2004-01-08 02:17:31 +0000 | [diff] [blame] | 174 | for(i=aiHashTable[h]; i; i=p->iNext){ |
| 175 | p = &aKeywordTable[i-1]; |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 176 | if( p->len==n && sqlite3StrNICmp(p->zName, z, n)==0 ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 177 | return p->tokenType; |
| 178 | } |
| 179 | } |
| 180 | return TK_ID; |
| 181 | } |
| 182 | |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 183 | |
| 184 | /* |
drh | ba21256 | 2004-01-08 02:17:31 +0000 | [diff] [blame] | 185 | ** If X is a character that can be used in an identifier and |
| 186 | ** X&0x80==0 then isIdChar[X] will be 1. If X&0x80==0x80 then |
| 187 | ** X is always an identifier character. (Hence all UTF-8 |
| 188 | ** characters can be part of an identifier). isIdChar[X] will |
| 189 | ** be 0 for every character in the lower 128 ASCII characters |
| 190 | ** that cannot be used as part of an identifier. |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 191 | ** |
| 192 | ** In this implementation, an identifier can be a string of |
| 193 | ** alphabetic characters, digits, and "_" plus any character |
| 194 | ** with the high-order bit set. The latter rule means that |
| 195 | ** any sequence of UTF-8 characters or characters taken from |
| 196 | ** an extended ISO8859 character set can form an identifier. |
| 197 | */ |
| 198 | static const char isIdChar[] = { |
| 199 | /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 200 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ |
| 201 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ |
| 202 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ |
| 203 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ |
| 204 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 205 | }; |
| 206 | |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 207 | #define IdChar(C) (((c=C)&0x80)!=0 || (c>0x2f && isIdChar[c-0x30])) |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 208 | |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 209 | /* |
drh | 61b487d | 2003-09-12 02:08:14 +0000 | [diff] [blame] | 210 | ** Return the length of the token that begins at z[0]. |
| 211 | ** Store the token type in *tokenType before returning. |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 212 | */ |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 213 | static int sqliteGetToken(const unsigned char *z, int *tokenType){ |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 214 | int i, c; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 215 | switch( *z ){ |
drh | 30cab80 | 2000-08-09 17:17:25 +0000 | [diff] [blame] | 216 | case ' ': case '\t': case '\n': case '\f': case '\r': { |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 217 | for(i=1; isspace(z[i]); i++){} |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 218 | *tokenType = TK_SPACE; |
| 219 | return i; |
| 220 | } |
| 221 | case '-': { |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 222 | if( z[1]=='-' ){ |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 223 | for(i=2; (c=z[i])!=0 && c!='\n'; i++){} |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 224 | *tokenType = TK_COMMENT; |
| 225 | return i; |
| 226 | } |
| 227 | *tokenType = TK_MINUS; |
| 228 | return 1; |
| 229 | } |
| 230 | case '(': { |
drh | dab3518 | 2003-09-27 13:39:38 +0000 | [diff] [blame] | 231 | *tokenType = TK_LP; |
| 232 | return 1; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 233 | } |
| 234 | case ')': { |
| 235 | *tokenType = TK_RP; |
| 236 | return 1; |
| 237 | } |
| 238 | case ';': { |
| 239 | *tokenType = TK_SEMI; |
| 240 | return 1; |
| 241 | } |
| 242 | case '+': { |
| 243 | *tokenType = TK_PLUS; |
| 244 | return 1; |
| 245 | } |
| 246 | case '*': { |
| 247 | *tokenType = TK_STAR; |
| 248 | return 1; |
| 249 | } |
| 250 | case '/': { |
drh | 66105a8 | 2002-08-27 14:28:29 +0000 | [diff] [blame] | 251 | if( z[1]!='*' || z[2]==0 ){ |
| 252 | *tokenType = TK_SLASH; |
| 253 | return 1; |
| 254 | } |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 255 | for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} |
| 256 | if( c ) i++; |
drh | 66105a8 | 2002-08-27 14:28:29 +0000 | [diff] [blame] | 257 | *tokenType = TK_COMMENT; |
| 258 | return i; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 259 | } |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 260 | case '%': { |
| 261 | *tokenType = TK_REM; |
| 262 | return 1; |
| 263 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 264 | case '=': { |
| 265 | *tokenType = TK_EQ; |
| 266 | return 1 + (z[1]=='='); |
| 267 | } |
| 268 | case '<': { |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 269 | if( (c=z[1])=='=' ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 270 | *tokenType = TK_LE; |
| 271 | return 2; |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 272 | }else if( c=='>' ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 273 | *tokenType = TK_NE; |
| 274 | return 2; |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 275 | }else if( c=='<' ){ |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 276 | *tokenType = TK_LSHIFT; |
| 277 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 278 | }else{ |
| 279 | *tokenType = TK_LT; |
| 280 | return 1; |
| 281 | } |
| 282 | } |
| 283 | case '>': { |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 284 | if( (c=z[1])=='=' ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 285 | *tokenType = TK_GE; |
| 286 | return 2; |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 287 | }else if( c=='>' ){ |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 288 | *tokenType = TK_RSHIFT; |
| 289 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 290 | }else{ |
| 291 | *tokenType = TK_GT; |
| 292 | return 1; |
| 293 | } |
| 294 | } |
| 295 | case '!': { |
| 296 | if( z[1]!='=' ){ |
| 297 | *tokenType = TK_ILLEGAL; |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 298 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 299 | }else{ |
| 300 | *tokenType = TK_NE; |
| 301 | return 2; |
| 302 | } |
| 303 | } |
drh | 0040077 | 2000-06-16 20:51:26 +0000 | [diff] [blame] | 304 | case '|': { |
| 305 | if( z[1]!='|' ){ |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 306 | *tokenType = TK_BITOR; |
drh | 0040077 | 2000-06-16 20:51:26 +0000 | [diff] [blame] | 307 | return 1; |
| 308 | }else{ |
| 309 | *tokenType = TK_CONCAT; |
| 310 | return 2; |
| 311 | } |
| 312 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 313 | case ',': { |
| 314 | *tokenType = TK_COMMA; |
| 315 | return 1; |
| 316 | } |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 317 | case '&': { |
| 318 | *tokenType = TK_BITAND; |
| 319 | return 1; |
| 320 | } |
| 321 | case '~': { |
| 322 | *tokenType = TK_BITNOT; |
| 323 | return 1; |
| 324 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 325 | case '\'': case '"': { |
| 326 | int delim = z[0]; |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 327 | for(i=1; (c=z[i])!=0; i++){ |
| 328 | if( c==delim ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 329 | if( z[i+1]==delim ){ |
| 330 | i++; |
| 331 | }else{ |
| 332 | break; |
| 333 | } |
| 334 | } |
| 335 | } |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 336 | if( c ) i++; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 337 | *tokenType = TK_STRING; |
| 338 | return i; |
| 339 | } |
| 340 | case '.': { |
drh | bb07e9a | 2003-04-16 02:17:35 +0000 | [diff] [blame] | 341 | *tokenType = TK_DOT; |
| 342 | return 1; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 343 | } |
| 344 | case '0': case '1': case '2': case '3': case '4': |
| 345 | case '5': case '6': case '7': case '8': case '9': { |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 346 | *tokenType = TK_INTEGER; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 347 | for(i=1; isdigit(z[i]); i++){} |
drh | bb07e9a | 2003-04-16 02:17:35 +0000 | [diff] [blame] | 348 | if( z[i]=='.' && isdigit(z[i+1]) ){ |
| 349 | i += 2; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 350 | while( isdigit(z[i]) ){ i++; } |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 351 | *tokenType = TK_FLOAT; |
| 352 | } |
| 353 | if( (z[i]=='e' || z[i]=='E') && |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 354 | ( isdigit(z[i+1]) |
| 355 | || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) |
| 356 | ) |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 357 | ){ |
| 358 | i += 2; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 359 | while( isdigit(z[i]) ){ i++; } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 360 | *tokenType = TK_FLOAT; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 361 | } |
| 362 | return i; |
| 363 | } |
drh | 2f4392f | 2002-02-14 21:42:51 +0000 | [diff] [blame] | 364 | case '[': { |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 365 | for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} |
drh | 2f4392f | 2002-02-14 21:42:51 +0000 | [diff] [blame] | 366 | *tokenType = TK_ID; |
| 367 | return i; |
| 368 | } |
drh | 7c972de | 2003-09-06 22:18:07 +0000 | [diff] [blame] | 369 | case '?': { |
drh | 5045789 | 2003-09-06 01:10:47 +0000 | [diff] [blame] | 370 | *tokenType = TK_VARIABLE; |
drh | fa6bc00 | 2004-09-07 16:19:52 +0000 | [diff] [blame] | 371 | for(i=1; isdigit(z[i]); i++){} |
| 372 | return i; |
drh | 5045789 | 2003-09-06 01:10:47 +0000 | [diff] [blame] | 373 | } |
drh | 895d747 | 2004-08-20 16:02:39 +0000 | [diff] [blame] | 374 | case ':': { |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 375 | for(i=1; IdChar(z[i]); i++){} |
drh | 2c6674c | 2004-08-25 04:07:01 +0000 | [diff] [blame] | 376 | *tokenType = i>1 ? TK_VARIABLE : TK_ILLEGAL; |
| 377 | return i; |
drh | 895d747 | 2004-08-20 16:02:39 +0000 | [diff] [blame] | 378 | } |
| 379 | case '$': { |
drh | 9d74b4c | 2004-08-24 15:23:34 +0000 | [diff] [blame] | 380 | *tokenType = TK_VARIABLE; |
drh | 895d747 | 2004-08-20 16:02:39 +0000 | [diff] [blame] | 381 | if( z[1]=='{' ){ |
| 382 | int nBrace = 1; |
| 383 | for(i=2; (c=z[i])!=0 && nBrace; i++){ |
| 384 | if( c=='{' ){ |
| 385 | nBrace++; |
| 386 | }else if( c=='}' ){ |
| 387 | nBrace--; |
| 388 | } |
| 389 | } |
drh | 9d74b4c | 2004-08-24 15:23:34 +0000 | [diff] [blame] | 390 | if( c==0 ) *tokenType = TK_ILLEGAL; |
drh | 895d747 | 2004-08-20 16:02:39 +0000 | [diff] [blame] | 391 | }else{ |
| 392 | int n = 0; |
| 393 | for(i=1; (c=z[i])!=0; i++){ |
| 394 | if( isalnum(c) || c=='_' ){ |
| 395 | n++; |
| 396 | }else if( c=='(' && n>0 ){ |
| 397 | do{ |
| 398 | i++; |
| 399 | }while( (c=z[i])!=0 && !isspace(c) && c!=')' ); |
| 400 | if( c==')' ){ |
| 401 | i++; |
drh | 895d747 | 2004-08-20 16:02:39 +0000 | [diff] [blame] | 402 | }else{ |
| 403 | *tokenType = TK_ILLEGAL; |
| 404 | } |
| 405 | break; |
| 406 | }else if( c==':' && z[i+1]==':' ){ |
| 407 | i++; |
| 408 | }else{ |
drh | 895d747 | 2004-08-20 16:02:39 +0000 | [diff] [blame] | 409 | break; |
| 410 | } |
| 411 | } |
drh | 9d74b4c | 2004-08-24 15:23:34 +0000 | [diff] [blame] | 412 | if( n==0 ) *tokenType = TK_ILLEGAL; |
drh | 895d747 | 2004-08-20 16:02:39 +0000 | [diff] [blame] | 413 | } |
| 414 | return i; |
| 415 | } |
danielk1977 | c572ef7 | 2004-05-27 09:28:41 +0000 | [diff] [blame] | 416 | case 'x': case 'X': { |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 417 | if( (c=z[1])=='\'' || c=='"' ){ |
| 418 | int delim = c; |
danielk1977 | 3fd0a73 | 2004-05-27 13:35:19 +0000 | [diff] [blame] | 419 | *tokenType = TK_BLOB; |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 420 | for(i=2; (c=z[i])!=0; i++){ |
| 421 | if( c==delim ){ |
danielk1977 | 3fd0a73 | 2004-05-27 13:35:19 +0000 | [diff] [blame] | 422 | if( i%2 ) *tokenType = TK_ILLEGAL; |
danielk1977 | c572ef7 | 2004-05-27 09:28:41 +0000 | [diff] [blame] | 423 | break; |
| 424 | } |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 425 | if( !isxdigit(c) ){ |
danielk1977 | 3fd0a73 | 2004-05-27 13:35:19 +0000 | [diff] [blame] | 426 | *tokenType = TK_ILLEGAL; |
| 427 | return i; |
| 428 | } |
danielk1977 | c572ef7 | 2004-05-27 09:28:41 +0000 | [diff] [blame] | 429 | } |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 430 | if( c ) i++; |
danielk1977 | c572ef7 | 2004-05-27 09:28:41 +0000 | [diff] [blame] | 431 | return i; |
| 432 | } |
| 433 | /* Otherwise fall through to the next case */ |
| 434 | } |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 435 | default: { |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 436 | if( !IdChar(*z) ){ |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 437 | break; |
| 438 | } |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 439 | for(i=1; IdChar(z[i]); i++){} |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 440 | *tokenType = sqlite3KeywordCode((char*)z, i); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 441 | return i; |
| 442 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 443 | } |
| 444 | *tokenType = TK_ILLEGAL; |
| 445 | return 1; |
| 446 | } |
| 447 | |
| 448 | /* |
| 449 | ** Run the parser on the given SQL string. The parser structure is |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 450 | ** passed in. An SQLITE_ status code is returned. If an error occurs |
| 451 | ** and pzErrMsg!=NULL then an error message might be written into |
| 452 | ** memory obtained from malloc() and *pzErrMsg made to point to that |
| 453 | ** error message. Or maybe not. |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 454 | */ |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 455 | int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 456 | int nErr = 0; |
| 457 | int i; |
| 458 | void *pEngine; |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 459 | int tokenType; |
| 460 | int lastTokenParsed = -1; |
drh | 9bb575f | 2004-09-06 17:24:11 +0000 | [diff] [blame] | 461 | sqlite3 *db = pParse->db; |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 462 | extern void *sqlite3ParserAlloc(void*(*)(int)); |
| 463 | extern void sqlite3ParserFree(void*, void(*)(void*)); |
| 464 | extern int sqlite3Parser(void*, int, Token, Parse*); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 465 | |
drh | 6d4abfb | 2001-10-22 02:58:08 +0000 | [diff] [blame] | 466 | db->flags &= ~SQLITE_Interrupt; |
drh | 4c50439 | 2000-10-16 22:06:40 +0000 | [diff] [blame] | 467 | pParse->rc = SQLITE_OK; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 468 | i = 0; |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 469 | pEngine = sqlite3ParserAlloc((void*(*)(int))malloc); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 470 | if( pEngine==0 ){ |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 471 | sqlite3SetString(pzErrMsg, "out of memory", (char*)0); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 472 | return 1; |
| 473 | } |
drh | fa6bc00 | 2004-09-07 16:19:52 +0000 | [diff] [blame] | 474 | assert( pParse->sLastToken.dyn==0 ); |
| 475 | assert( pParse->pNewTable==0 ); |
| 476 | assert( pParse->pNewTrigger==0 ); |
| 477 | assert( pParse->nVar==0 ); |
| 478 | assert( pParse->nVarExpr==0 ); |
| 479 | assert( pParse->nVarExprAlloc==0 ); |
| 480 | assert( pParse->apVarExpr==0 ); |
drh | 3f7d4e4 | 2004-07-24 14:35:58 +0000 | [diff] [blame] | 481 | pParse->zTail = pParse->zSql = zSql; |
danielk1977 | 6f8a503 | 2004-05-10 10:34:51 +0000 | [diff] [blame] | 482 | while( sqlite3_malloc_failed==0 && zSql[i]!=0 ){ |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 483 | assert( i>=0 ); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 484 | pParse->sLastToken.z = &zSql[i]; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 485 | assert( pParse->sLastToken.dyn==0 ); |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 486 | pParse->sLastToken.n = sqliteGetToken((unsigned char*)&zSql[i], &tokenType); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 487 | i += pParse->sLastToken.n; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 488 | switch( tokenType ){ |
| 489 | case TK_SPACE: |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 490 | case TK_COMMENT: { |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 491 | if( (db->flags & SQLITE_Interrupt)!=0 ){ |
| 492 | pParse->rc = SQLITE_INTERRUPT; |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 493 | sqlite3SetString(pzErrMsg, "interrupt", (char*)0); |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 494 | goto abort_parse; |
| 495 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 496 | break; |
| 497 | } |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 498 | case TK_ILLEGAL: { |
drh | ae29ffb | 2004-09-25 14:39:18 +0000 | [diff] [blame] | 499 | if( pzErrMsg ){ |
| 500 | sqliteFree(*pzErrMsg); |
| 501 | *pzErrMsg = sqlite3MPrintf("unrecognized token: \"%T\"", |
| 502 | &pParse->sLastToken); |
| 503 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 504 | nErr++; |
drh | caec2f1 | 2003-01-07 02:47:47 +0000 | [diff] [blame] | 505 | goto abort_parse; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 506 | } |
drh | 326dce7 | 2003-01-29 14:06:07 +0000 | [diff] [blame] | 507 | case TK_SEMI: { |
| 508 | pParse->zTail = &zSql[i]; |
| 509 | /* Fall thru into the default case */ |
| 510 | } |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 511 | default: { |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 512 | sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 513 | lastTokenParsed = tokenType; |
| 514 | if( pParse->rc!=SQLITE_OK ){ |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 515 | goto abort_parse; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 516 | } |
| 517 | break; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 518 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 519 | } |
| 520 | } |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 521 | abort_parse: |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 522 | if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ |
| 523 | if( lastTokenParsed!=TK_SEMI ){ |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 524 | sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); |
drh | 326dce7 | 2003-01-29 14:06:07 +0000 | [diff] [blame] | 525 | pParse->zTail = &zSql[i]; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 526 | } |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 527 | sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 528 | } |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 529 | sqlite3ParserFree(pEngine, free); |
drh | 71c697e | 2004-08-08 23:39:19 +0000 | [diff] [blame] | 530 | if( sqlite3_malloc_failed ){ |
| 531 | pParse->rc = SQLITE_NOMEM; |
| 532 | } |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 533 | if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ |
danielk1977 | f20b21c | 2004-05-31 23:56:42 +0000 | [diff] [blame] | 534 | sqlite3SetString(&pParse->zErrMsg, sqlite3ErrStr(pParse->rc), |
drh | 4174398 | 2003-12-06 21:43:55 +0000 | [diff] [blame] | 535 | (char*)0); |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 536 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 537 | if( pParse->zErrMsg ){ |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 538 | if( pzErrMsg && *pzErrMsg==0 ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 539 | *pzErrMsg = pParse->zErrMsg; |
| 540 | }else{ |
| 541 | sqliteFree(pParse->zErrMsg); |
| 542 | } |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 543 | pParse->zErrMsg = 0; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 544 | if( !nErr ) nErr++; |
| 545 | } |
drh | 826fb5a | 2004-02-14 23:59:57 +0000 | [diff] [blame] | 546 | if( pParse->pVdbe && pParse->nErr>0 ){ |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 547 | sqlite3VdbeDelete(pParse->pVdbe); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 548 | pParse->pVdbe = 0; |
| 549 | } |
drh | fa6bc00 | 2004-09-07 16:19:52 +0000 | [diff] [blame] | 550 | sqlite3DeleteTable(pParse->db, pParse->pNewTable); |
| 551 | sqlite3DeleteTrigger(pParse->pNewTrigger); |
| 552 | sqliteFree(pParse->apVarExpr); |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 553 | if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){ |
drh | 4c50439 | 2000-10-16 22:06:40 +0000 | [diff] [blame] | 554 | pParse->rc = SQLITE_ERROR; |
| 555 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 556 | return nErr; |
| 557 | } |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 558 | |
| 559 | /* |
danielk1977 | 6f8a503 | 2004-05-10 10:34:51 +0000 | [diff] [blame] | 560 | ** Token types used by the sqlite3_complete() routine. See the header |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 561 | ** comments on that procedure for additional information. |
| 562 | */ |
| 563 | #define tkEXPLAIN 0 |
| 564 | #define tkCREATE 1 |
| 565 | #define tkTEMP 2 |
| 566 | #define tkTRIGGER 3 |
| 567 | #define tkEND 4 |
| 568 | #define tkSEMI 5 |
| 569 | #define tkWS 6 |
| 570 | #define tkOTHER 7 |
| 571 | |
| 572 | /* |
| 573 | ** Return TRUE if the given SQL string ends in a semicolon. |
| 574 | ** |
| 575 | ** Special handling is require for CREATE TRIGGER statements. |
| 576 | ** Whenever the CREATE TRIGGER keywords are seen, the statement |
| 577 | ** must end with ";END;". |
| 578 | ** |
| 579 | ** This implementation uses a state machine with 7 states: |
| 580 | ** |
| 581 | ** (0) START At the beginning or end of an SQL statement. This routine |
| 582 | ** returns 1 if it ends in the START state and 0 if it ends |
| 583 | ** in any other state. |
| 584 | ** |
| 585 | ** (1) EXPLAIN The keyword EXPLAIN has been seen at the beginning of |
| 586 | ** a statement. |
| 587 | ** |
| 588 | ** (2) CREATE The keyword CREATE has been seen at the beginning of a |
| 589 | ** statement, possibly preceeded by EXPLAIN and/or followed by |
| 590 | ** TEMP or TEMPORARY |
| 591 | ** |
| 592 | ** (3) NORMAL We are in the middle of statement which ends with a single |
| 593 | ** semicolon. |
| 594 | ** |
| 595 | ** (4) TRIGGER We are in the middle of a trigger definition that must be |
| 596 | ** ended by a semicolon, the keyword END, and another semicolon. |
| 597 | ** |
| 598 | ** (5) SEMI We've seen the first semicolon in the ";END;" that occurs at |
| 599 | ** the end of a trigger definition. |
| 600 | ** |
| 601 | ** (6) END We've seen the ";END" of the ";END;" that occurs at the end |
| 602 | ** of a trigger difinition. |
| 603 | ** |
| 604 | ** Transitions between states above are determined by tokens extracted |
| 605 | ** from the input. The following tokens are significant: |
| 606 | ** |
| 607 | ** (0) tkEXPLAIN The "explain" keyword. |
| 608 | ** (1) tkCREATE The "create" keyword. |
| 609 | ** (2) tkTEMP The "temp" or "temporary" keyword. |
| 610 | ** (3) tkTRIGGER The "trigger" keyword. |
| 611 | ** (4) tkEND The "end" keyword. |
| 612 | ** (5) tkSEMI A semicolon. |
| 613 | ** (6) tkWS Whitespace |
| 614 | ** (7) tkOTHER Any other SQL token. |
| 615 | ** |
| 616 | ** Whitespace never causes a state transition and is always ignored. |
| 617 | */ |
danielk1977 | 6f8a503 | 2004-05-10 10:34:51 +0000 | [diff] [blame] | 618 | int sqlite3_complete(const char *zSql){ |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 619 | u8 state = 0; /* Current state, using numbers defined in header comment */ |
| 620 | u8 token; /* Value of the next token */ |
| 621 | |
| 622 | /* The following matrix defines the transition from one state to another |
| 623 | ** according to what token is seen. trans[state][token] returns the |
| 624 | ** next state. |
| 625 | */ |
| 626 | static const u8 trans[7][8] = { |
drh | e1e38c4 | 2003-05-04 18:30:59 +0000 | [diff] [blame] | 627 | /* Token: */ |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 628 | /* State: ** EXPLAIN CREATE TEMP TRIGGER END SEMI WS OTHER */ |
| 629 | /* 0 START: */ { 1, 2, 3, 3, 3, 0, 0, 3, }, |
| 630 | /* 1 EXPLAIN: */ { 3, 2, 3, 3, 3, 0, 1, 3, }, |
| 631 | /* 2 CREATE: */ { 3, 3, 2, 4, 3, 0, 2, 3, }, |
| 632 | /* 3 NORMAL: */ { 3, 3, 3, 3, 3, 0, 3, 3, }, |
| 633 | /* 4 TRIGGER: */ { 4, 4, 4, 4, 4, 5, 4, 4, }, |
| 634 | /* 5 SEMI: */ { 4, 4, 4, 4, 6, 5, 5, 4, }, |
| 635 | /* 6 END: */ { 4, 4, 4, 4, 4, 0, 6, 4, }, |
| 636 | }; |
| 637 | |
| 638 | while( *zSql ){ |
| 639 | switch( *zSql ){ |
| 640 | case ';': { /* A semicolon */ |
| 641 | token = tkSEMI; |
| 642 | break; |
| 643 | } |
| 644 | case ' ': |
| 645 | case '\r': |
| 646 | case '\t': |
| 647 | case '\n': |
| 648 | case '\f': { /* White space is ignored */ |
| 649 | token = tkWS; |
| 650 | break; |
| 651 | } |
| 652 | case '/': { /* C-style comments */ |
| 653 | if( zSql[1]!='*' ){ |
| 654 | token = tkOTHER; |
| 655 | break; |
| 656 | } |
| 657 | zSql += 2; |
| 658 | while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; } |
| 659 | if( zSql[0]==0 ) return 0; |
| 660 | zSql++; |
| 661 | token = tkWS; |
| 662 | break; |
| 663 | } |
| 664 | case '-': { /* SQL-style comments from "--" to end of line */ |
| 665 | if( zSql[1]!='-' ){ |
| 666 | token = tkOTHER; |
| 667 | break; |
| 668 | } |
| 669 | while( *zSql && *zSql!='\n' ){ zSql++; } |
| 670 | if( *zSql==0 ) return state==0; |
| 671 | token = tkWS; |
| 672 | break; |
| 673 | } |
| 674 | case '[': { /* Microsoft-style identifiers in [...] */ |
| 675 | zSql++; |
| 676 | while( *zSql && *zSql!=']' ){ zSql++; } |
| 677 | if( *zSql==0 ) return 0; |
| 678 | token = tkOTHER; |
| 679 | break; |
| 680 | } |
| 681 | case '"': /* single- and double-quoted strings */ |
| 682 | case '\'': { |
| 683 | int c = *zSql; |
| 684 | zSql++; |
| 685 | while( *zSql && *zSql!=c ){ zSql++; } |
| 686 | if( *zSql==0 ) return 0; |
| 687 | token = tkOTHER; |
| 688 | break; |
| 689 | } |
| 690 | default: { |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 691 | int c; |
| 692 | if( IdChar((u8)*zSql) ){ |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 693 | /* Keywords and unquoted identifiers */ |
| 694 | int nId; |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame^] | 695 | for(nId=1; IdChar(zSql[nId]); nId++){} |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 696 | switch( *zSql ){ |
| 697 | case 'c': case 'C': { |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 698 | if( nId==6 && sqlite3StrNICmp(zSql, "create", 6)==0 ){ |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 699 | token = tkCREATE; |
| 700 | }else{ |
| 701 | token = tkOTHER; |
| 702 | } |
| 703 | break; |
| 704 | } |
| 705 | case 't': case 'T': { |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 706 | if( nId==7 && sqlite3StrNICmp(zSql, "trigger", 7)==0 ){ |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 707 | token = tkTRIGGER; |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 708 | }else if( nId==4 && sqlite3StrNICmp(zSql, "temp", 4)==0 ){ |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 709 | token = tkTEMP; |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 710 | }else if( nId==9 && sqlite3StrNICmp(zSql, "temporary", 9)==0 ){ |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 711 | token = tkTEMP; |
| 712 | }else{ |
| 713 | token = tkOTHER; |
| 714 | } |
| 715 | break; |
| 716 | } |
| 717 | case 'e': case 'E': { |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 718 | if( nId==3 && sqlite3StrNICmp(zSql, "end", 3)==0 ){ |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 719 | token = tkEND; |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 720 | }else if( nId==7 && sqlite3StrNICmp(zSql, "explain", 7)==0 ){ |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 721 | token = tkEXPLAIN; |
| 722 | }else{ |
| 723 | token = tkOTHER; |
| 724 | } |
| 725 | break; |
| 726 | } |
| 727 | default: { |
| 728 | token = tkOTHER; |
| 729 | break; |
| 730 | } |
| 731 | } |
| 732 | zSql += nId-1; |
| 733 | }else{ |
| 734 | /* Operators and special symbols */ |
| 735 | token = tkOTHER; |
| 736 | } |
| 737 | break; |
| 738 | } |
| 739 | } |
| 740 | state = trans[state][token]; |
| 741 | zSql++; |
| 742 | } |
| 743 | return state==0; |
| 744 | } |
danielk1977 | 61de0d1 | 2004-05-27 23:56:16 +0000 | [diff] [blame] | 745 | |
| 746 | /* |
| 747 | ** This routine is the same as the sqlite3_complete() routine described |
| 748 | ** above, except that the parameter is required to be UTF-16 encoded, not |
| 749 | ** UTF-8. |
| 750 | */ |
| 751 | int sqlite3_complete16(const void *zSql){ |
danielk1977 | bfd6cce | 2004-06-18 04:24:54 +0000 | [diff] [blame] | 752 | sqlite3_value *pVal; |
danielk1977 | 5314c4d | 2004-06-18 06:02:35 +0000 | [diff] [blame] | 753 | char const *zSql8; |
danielk1977 | bfd6cce | 2004-06-18 04:24:54 +0000 | [diff] [blame] | 754 | int rc = 0; |
| 755 | |
| 756 | pVal = sqlite3ValueNew(); |
| 757 | sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC); |
| 758 | zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8); |
| 759 | if( zSql8 ){ |
| 760 | rc = sqlite3_complete(zSql8); |
danielk1977 | bfd6cce | 2004-06-18 04:24:54 +0000 | [diff] [blame] | 761 | } |
| 762 | sqlite3ValueFree(pVal); |
danielk1977 | 61de0d1 | 2004-05-27 23:56:16 +0000 | [diff] [blame] | 763 | return rc; |
| 764 | } |