drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 1 | /* |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 2 | ** 2001 September 15 |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 3 | ** |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 4 | ** The author disclaims copyright to this source code. In place of |
| 5 | ** a legal notice, here is a blessing: |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 6 | ** |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 7 | ** May you do good and not evil. |
| 8 | ** May you find forgiveness for yourself and forgive others. |
| 9 | ** May you share freely, never taking more than you give. |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 10 | ** |
| 11 | ************************************************************************* |
| 12 | ** An tokenizer for SQL |
| 13 | ** |
| 14 | ** This file contains C code that splits an SQL input string up into |
| 15 | ** individual tokens and sends those tokens one-by-one over to the |
| 16 | ** parser for analysis. |
| 17 | ** |
drh | a76b5df | 2002-02-23 02:32:10 +0000 | [diff] [blame^] | 18 | ** $Id: tokenize.c,v 1.38 2002/02/23 02:32:10 drh Exp $ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 19 | */ |
| 20 | #include "sqliteInt.h" |
drh | ad75e98 | 2001-10-09 04:19:46 +0000 | [diff] [blame] | 21 | #include "os.h" |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 22 | #include <ctype.h> |
drh | dcc581c | 2000-05-30 13:44:19 +0000 | [diff] [blame] | 23 | #include <stdlib.h> |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 24 | |
| 25 | /* |
| 26 | ** All the keywords of the SQL language are stored as in a hash |
| 27 | ** table composed of instances of the following structure. |
| 28 | */ |
| 29 | typedef struct Keyword Keyword; |
| 30 | struct Keyword { |
| 31 | char *zName; /* The keyword name */ |
| 32 | int len; /* Number of characters in the keyword */ |
| 33 | int tokenType; /* The token value for this keyword */ |
| 34 | Keyword *pNext; /* Next keyword with the same hash */ |
| 35 | }; |
| 36 | |
| 37 | /* |
| 38 | ** These are the keywords |
| 39 | */ |
| 40 | static Keyword aKeywordTable[] = { |
drh | 9cfcf5d | 2002-01-29 18:41:24 +0000 | [diff] [blame] | 41 | { "ABORT", 0, TK_ABORT, 0 }, |
drh | fef5208 | 2000-06-06 01:50:43 +0000 | [diff] [blame] | 42 | { "ALL", 0, TK_ALL, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 43 | { "AND", 0, TK_AND, 0 }, |
| 44 | { "AS", 0, TK_AS, 0 }, |
| 45 | { "ASC", 0, TK_ASC, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 46 | { "BEGIN", 0, TK_BEGIN, 0 }, |
drh | fef5208 | 2000-06-06 01:50:43 +0000 | [diff] [blame] | 47 | { "BETWEEN", 0, TK_BETWEEN, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 48 | { "BY", 0, TK_BY, 0 }, |
| 49 | { "CHECK", 0, TK_CHECK, 0 }, |
drh | f57b14a | 2001-09-14 18:54:08 +0000 | [diff] [blame] | 50 | { "CLUSTER", 0, TK_CLUSTER, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 51 | { "COMMIT", 0, TK_COMMIT, 0 }, |
drh | 9cfcf5d | 2002-01-29 18:41:24 +0000 | [diff] [blame] | 52 | { "CONFLICT", 0, TK_CONFLICT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 53 | { "CONSTRAINT", 0, TK_CONSTRAINT, 0 }, |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 54 | { "COPY", 0, TK_COPY, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 55 | { "CREATE", 0, TK_CREATE, 0 }, |
| 56 | { "DEFAULT", 0, TK_DEFAULT, 0 }, |
| 57 | { "DELETE", 0, TK_DELETE, 0 }, |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 58 | { "DELIMITERS", 0, TK_DELIMITERS, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 59 | { "DESC", 0, TK_DESC, 0 }, |
drh | efb7251 | 2000-05-31 20:00:52 +0000 | [diff] [blame] | 60 | { "DISTINCT", 0, TK_DISTINCT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 61 | { "DROP", 0, TK_DROP, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 62 | { "END", 0, TK_END, 0 }, |
drh | 82c3d63 | 2000-06-06 21:56:07 +0000 | [diff] [blame] | 63 | { "EXCEPT", 0, TK_EXCEPT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 64 | { "EXPLAIN", 0, TK_EXPLAIN, 0 }, |
drh | 1c92853 | 2002-01-31 15:54:21 +0000 | [diff] [blame] | 65 | { "FAIL", 0, TK_FAIL, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 66 | { "FROM", 0, TK_FROM, 0 }, |
drh | dce2cbe | 2000-05-31 02:27:49 +0000 | [diff] [blame] | 67 | { "GLOB", 0, TK_GLOB, 0 }, |
drh | 2282792 | 2000-06-06 17:27:05 +0000 | [diff] [blame] | 68 | { "GROUP", 0, TK_GROUP, 0 }, |
| 69 | { "HAVING", 0, TK_HAVING, 0 }, |
drh | 9cfcf5d | 2002-01-29 18:41:24 +0000 | [diff] [blame] | 70 | { "IGNORE", 0, TK_IGNORE, 0 }, |
drh | fef5208 | 2000-06-06 01:50:43 +0000 | [diff] [blame] | 71 | { "IN", 0, TK_IN, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 72 | { "INDEX", 0, TK_INDEX, 0 }, |
| 73 | { "INSERT", 0, TK_INSERT, 0 }, |
drh | 82c3d63 | 2000-06-06 21:56:07 +0000 | [diff] [blame] | 74 | { "INTERSECT", 0, TK_INTERSECT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 75 | { "INTO", 0, TK_INTO, 0 }, |
| 76 | { "IS", 0, TK_IS, 0 }, |
| 77 | { "ISNULL", 0, TK_ISNULL, 0 }, |
| 78 | { "KEY", 0, TK_KEY, 0 }, |
drh | dce2cbe | 2000-05-31 02:27:49 +0000 | [diff] [blame] | 79 | { "LIKE", 0, TK_LIKE, 0 }, |
drh | 9bbca4c | 2001-11-06 04:00:18 +0000 | [diff] [blame] | 80 | { "LIMIT", 0, TK_LIMIT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 81 | { "NOT", 0, TK_NOT, 0 }, |
| 82 | { "NOTNULL", 0, TK_NOTNULL, 0 }, |
| 83 | { "NULL", 0, TK_NULL, 0 }, |
drh | 9bbca4c | 2001-11-06 04:00:18 +0000 | [diff] [blame] | 84 | { "OFFSET", 0, TK_OFFSET, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 85 | { "ON", 0, TK_ON, 0 }, |
| 86 | { "OR", 0, TK_OR, 0 }, |
| 87 | { "ORDER", 0, TK_ORDER, 0 }, |
drh | f57b14a | 2001-09-14 18:54:08 +0000 | [diff] [blame] | 88 | { "PRAGMA", 0, TK_PRAGMA, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 89 | { "PRIMARY", 0, TK_PRIMARY, 0 }, |
drh | 9cfcf5d | 2002-01-29 18:41:24 +0000 | [diff] [blame] | 90 | { "REPLACE", 0, TK_REPLACE, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 91 | { "ROLLBACK", 0, TK_ROLLBACK, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 92 | { "SELECT", 0, TK_SELECT, 0 }, |
| 93 | { "SET", 0, TK_SET, 0 }, |
| 94 | { "TABLE", 0, TK_TABLE, 0 }, |
drh | f57b339 | 2001-10-08 13:22:32 +0000 | [diff] [blame] | 95 | { "TEMP", 0, TK_TEMP, 0 }, |
| 96 | { "TEMPORARY", 0, TK_TEMP, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 97 | { "TRANSACTION", 0, TK_TRANSACTION, 0 }, |
drh | 82c3d63 | 2000-06-06 21:56:07 +0000 | [diff] [blame] | 98 | { "UNION", 0, TK_UNION, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 99 | { "UNIQUE", 0, TK_UNIQUE, 0 }, |
| 100 | { "UPDATE", 0, TK_UPDATE, 0 }, |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 101 | { "USING", 0, TK_USING, 0 }, |
drh | dce2cbe | 2000-05-31 02:27:49 +0000 | [diff] [blame] | 102 | { "VACUUM", 0, TK_VACUUM, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 103 | { "VALUES", 0, TK_VALUES, 0 }, |
drh | a76b5df | 2002-02-23 02:32:10 +0000 | [diff] [blame^] | 104 | { "VIEW", 0, TK_VIEW, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 105 | { "WHERE", 0, TK_WHERE, 0 }, |
| 106 | }; |
| 107 | |
| 108 | /* |
| 109 | ** This is the hash table |
| 110 | */ |
drh | daffd0e | 2001-04-11 14:28:42 +0000 | [diff] [blame] | 111 | #define KEY_HASH_SIZE 71 |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 112 | static Keyword *apHashTable[KEY_HASH_SIZE]; |
| 113 | |
| 114 | |
| 115 | /* |
| 116 | ** This function looks up an identifier to determine if it is a |
| 117 | ** keyword. If it is a keyword, the token code of that keyword is |
| 118 | ** returned. If the input is not a keyword, TK_ID is returned. |
| 119 | */ |
drh | 17f7193 | 2002-02-21 12:01:27 +0000 | [diff] [blame] | 120 | int sqliteKeywordCode(const char *z, int n){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 121 | int h; |
| 122 | Keyword *p; |
| 123 | if( aKeywordTable[0].len==0 ){ |
| 124 | /* Initialize the keyword hash table */ |
drh | ad75e98 | 2001-10-09 04:19:46 +0000 | [diff] [blame] | 125 | sqliteOsEnterMutex(); |
| 126 | if( aKeywordTable[0].len==0 ){ |
| 127 | int i; |
| 128 | int n; |
| 129 | n = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]); |
| 130 | for(i=0; i<n; i++){ |
| 131 | aKeywordTable[i].len = strlen(aKeywordTable[i].zName); |
| 132 | h = sqliteHashNoCase(aKeywordTable[i].zName, aKeywordTable[i].len); |
| 133 | h %= KEY_HASH_SIZE; |
| 134 | aKeywordTable[i].pNext = apHashTable[h]; |
| 135 | apHashTable[h] = &aKeywordTable[i]; |
| 136 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 137 | } |
drh | ad75e98 | 2001-10-09 04:19:46 +0000 | [diff] [blame] | 138 | sqliteOsLeaveMutex(); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 139 | } |
| 140 | h = sqliteHashNoCase(z, n) % KEY_HASH_SIZE; |
| 141 | for(p=apHashTable[h]; p; p=p->pNext){ |
| 142 | if( p->len==n && sqliteStrNICmp(p->zName, z, n)==0 ){ |
| 143 | return p->tokenType; |
| 144 | } |
| 145 | } |
| 146 | return TK_ID; |
| 147 | } |
| 148 | |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 149 | |
| 150 | /* |
| 151 | ** If X is a character that can be used in an identifier then |
| 152 | ** isIdChar[X] will be 1. Otherwise isIdChar[X] will be 0. |
| 153 | ** |
| 154 | ** In this implementation, an identifier can be a string of |
| 155 | ** alphabetic characters, digits, and "_" plus any character |
| 156 | ** with the high-order bit set. The latter rule means that |
| 157 | ** any sequence of UTF-8 characters or characters taken from |
| 158 | ** an extended ISO8859 character set can form an identifier. |
| 159 | */ |
| 160 | static const char isIdChar[] = { |
| 161 | /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ |
| 162 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ |
| 163 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ |
| 164 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ |
| 165 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ |
| 166 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ |
| 167 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ |
| 168 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ |
| 169 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ |
| 170 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 8x */ |
| 171 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 9x */ |
| 172 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ax */ |
| 173 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Bx */ |
| 174 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Cx */ |
| 175 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Dx */ |
| 176 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ex */ |
| 177 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Fx */ |
| 178 | }; |
| 179 | |
| 180 | |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 181 | /* |
| 182 | ** Return the length of the token that begins at z[0]. Return |
| 183 | ** -1 if the token is (or might be) incomplete. Store the token |
| 184 | ** type in *tokenType before returning. |
| 185 | */ |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 186 | static int sqliteGetToken(const unsigned char *z, int *tokenType){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 187 | int i; |
| 188 | switch( *z ){ |
drh | 30cab80 | 2000-08-09 17:17:25 +0000 | [diff] [blame] | 189 | case ' ': case '\t': case '\n': case '\f': case '\r': { |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 190 | for(i=1; z[i] && isspace(z[i]); i++){} |
| 191 | *tokenType = TK_SPACE; |
| 192 | return i; |
| 193 | } |
| 194 | case '-': { |
| 195 | if( z[1]==0 ) return -1; |
| 196 | if( z[1]=='-' ){ |
| 197 | for(i=2; z[i] && z[i]!='\n'; i++){} |
| 198 | *tokenType = TK_COMMENT; |
| 199 | return i; |
| 200 | } |
| 201 | *tokenType = TK_MINUS; |
| 202 | return 1; |
| 203 | } |
| 204 | case '(': { |
| 205 | *tokenType = TK_LP; |
| 206 | return 1; |
| 207 | } |
| 208 | case ')': { |
| 209 | *tokenType = TK_RP; |
| 210 | return 1; |
| 211 | } |
| 212 | case ';': { |
| 213 | *tokenType = TK_SEMI; |
| 214 | return 1; |
| 215 | } |
| 216 | case '+': { |
| 217 | *tokenType = TK_PLUS; |
| 218 | return 1; |
| 219 | } |
| 220 | case '*': { |
| 221 | *tokenType = TK_STAR; |
| 222 | return 1; |
| 223 | } |
| 224 | case '/': { |
| 225 | *tokenType = TK_SLASH; |
| 226 | return 1; |
| 227 | } |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 228 | case '%': { |
| 229 | *tokenType = TK_REM; |
| 230 | return 1; |
| 231 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 232 | case '=': { |
| 233 | *tokenType = TK_EQ; |
| 234 | return 1 + (z[1]=='='); |
| 235 | } |
| 236 | case '<': { |
| 237 | if( z[1]=='=' ){ |
| 238 | *tokenType = TK_LE; |
| 239 | return 2; |
| 240 | }else if( z[1]=='>' ){ |
| 241 | *tokenType = TK_NE; |
| 242 | return 2; |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 243 | }else if( z[1]=='<' ){ |
| 244 | *tokenType = TK_LSHIFT; |
| 245 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 246 | }else{ |
| 247 | *tokenType = TK_LT; |
| 248 | return 1; |
| 249 | } |
| 250 | } |
| 251 | case '>': { |
| 252 | if( z[1]=='=' ){ |
| 253 | *tokenType = TK_GE; |
| 254 | return 2; |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 255 | }else if( z[1]=='>' ){ |
| 256 | *tokenType = TK_RSHIFT; |
| 257 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 258 | }else{ |
| 259 | *tokenType = TK_GT; |
| 260 | return 1; |
| 261 | } |
| 262 | } |
| 263 | case '!': { |
| 264 | if( z[1]!='=' ){ |
| 265 | *tokenType = TK_ILLEGAL; |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 266 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 267 | }else{ |
| 268 | *tokenType = TK_NE; |
| 269 | return 2; |
| 270 | } |
| 271 | } |
drh | 0040077 | 2000-06-16 20:51:26 +0000 | [diff] [blame] | 272 | case '|': { |
| 273 | if( z[1]!='|' ){ |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 274 | *tokenType = TK_BITOR; |
drh | 0040077 | 2000-06-16 20:51:26 +0000 | [diff] [blame] | 275 | return 1; |
| 276 | }else{ |
| 277 | *tokenType = TK_CONCAT; |
| 278 | return 2; |
| 279 | } |
| 280 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 281 | case ',': { |
| 282 | *tokenType = TK_COMMA; |
| 283 | return 1; |
| 284 | } |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 285 | case '&': { |
| 286 | *tokenType = TK_BITAND; |
| 287 | return 1; |
| 288 | } |
| 289 | case '~': { |
| 290 | *tokenType = TK_BITNOT; |
| 291 | return 1; |
| 292 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 293 | case '\'': case '"': { |
| 294 | int delim = z[0]; |
| 295 | for(i=1; z[i]; i++){ |
| 296 | if( z[i]==delim ){ |
| 297 | if( z[i+1]==delim ){ |
| 298 | i++; |
| 299 | }else{ |
| 300 | break; |
| 301 | } |
| 302 | } |
| 303 | } |
| 304 | if( z[i] ) i++; |
| 305 | *tokenType = TK_STRING; |
| 306 | return i; |
| 307 | } |
| 308 | case '.': { |
| 309 | if( !isdigit(z[1]) ){ |
| 310 | *tokenType = TK_DOT; |
| 311 | return 1; |
| 312 | } |
| 313 | /* Fall thru into the next case */ |
| 314 | } |
| 315 | case '0': case '1': case '2': case '3': case '4': |
| 316 | case '5': case '6': case '7': case '8': case '9': { |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 317 | *tokenType = TK_INTEGER; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 318 | for(i=1; z[i] && isdigit(z[i]); i++){} |
| 319 | if( z[i]=='.' ){ |
| 320 | i++; |
| 321 | while( z[i] && isdigit(z[i]) ){ i++; } |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 322 | *tokenType = TK_FLOAT; |
| 323 | } |
| 324 | if( (z[i]=='e' || z[i]=='E') && |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 325 | ( isdigit(z[i+1]) |
| 326 | || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) |
| 327 | ) |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 328 | ){ |
| 329 | i += 2; |
| 330 | while( z[i] && isdigit(z[i]) ){ i++; } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 331 | *tokenType = TK_FLOAT; |
| 332 | }else if( z[0]=='.' ){ |
| 333 | *tokenType = TK_FLOAT; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 334 | } |
| 335 | return i; |
| 336 | } |
drh | 2f4392f | 2002-02-14 21:42:51 +0000 | [diff] [blame] | 337 | case '[': { |
| 338 | for(i=1; z[i] && z[i-1]!=']'; i++){} |
| 339 | *tokenType = TK_ID; |
| 340 | return i; |
| 341 | } |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 342 | default: { |
| 343 | if( !isIdChar[*z] ){ |
| 344 | break; |
| 345 | } |
| 346 | for(i=1; isIdChar[z[i]]; i++){} |
drh | 6a53534 | 2001-10-19 16:44:56 +0000 | [diff] [blame] | 347 | *tokenType = sqliteKeywordCode((char*)z, i); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 348 | return i; |
| 349 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 350 | } |
| 351 | *tokenType = TK_ILLEGAL; |
| 352 | return 1; |
| 353 | } |
| 354 | |
| 355 | /* |
| 356 | ** Run the parser on the given SQL string. The parser structure is |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 357 | ** passed in. An SQLITE_ status code is returned. If an error occurs |
| 358 | ** and pzErrMsg!=NULL then an error message might be written into |
| 359 | ** memory obtained from malloc() and *pzErrMsg made to point to that |
| 360 | ** error message. Or maybe not. |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 361 | */ |
drh | 80ff32f | 2001-11-04 18:32:46 +0000 | [diff] [blame] | 362 | int sqliteRunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 363 | int nErr = 0; |
| 364 | int i; |
| 365 | void *pEngine; |
| 366 | int once = 1; |
drh | 6d4abfb | 2001-10-22 02:58:08 +0000 | [diff] [blame] | 367 | sqlite *db = pParse->db; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 368 | extern void *sqliteParserAlloc(void*(*)(int)); |
| 369 | extern void sqliteParserFree(void*, void(*)(void*)); |
drh | 338ea13 | 2001-02-11 16:56:24 +0000 | [diff] [blame] | 370 | extern int sqliteParser(void*, int, Token, Parse*); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 371 | |
drh | 6d4abfb | 2001-10-22 02:58:08 +0000 | [diff] [blame] | 372 | db->flags &= ~SQLITE_Interrupt; |
drh | 4c50439 | 2000-10-16 22:06:40 +0000 | [diff] [blame] | 373 | pParse->rc = SQLITE_OK; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 374 | i = 0; |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 375 | pEngine = sqliteParserAlloc((void*(*)(int))malloc); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 376 | if( pEngine==0 ){ |
| 377 | sqliteSetString(pzErrMsg, "out of memory", 0); |
| 378 | return 1; |
| 379 | } |
drh | daffd0e | 2001-04-11 14:28:42 +0000 | [diff] [blame] | 380 | while( sqlite_malloc_failed==0 && nErr==0 && i>=0 && zSql[i]!=0 ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 381 | int tokenType; |
| 382 | |
drh | 6d4abfb | 2001-10-22 02:58:08 +0000 | [diff] [blame] | 383 | if( (db->flags & SQLITE_Interrupt)!=0 ){ |
drh | 4c50439 | 2000-10-16 22:06:40 +0000 | [diff] [blame] | 384 | pParse->rc = SQLITE_INTERRUPT; |
| 385 | sqliteSetString(pzErrMsg, "interrupt", 0); |
| 386 | break; |
| 387 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 388 | pParse->sLastToken.z = &zSql[i]; |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 389 | pParse->sLastToken.n = sqliteGetToken((unsigned char*)&zSql[i], &tokenType); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 390 | i += pParse->sLastToken.n; |
| 391 | if( once ){ |
| 392 | pParse->sFirstToken = pParse->sLastToken; |
| 393 | once = 0; |
| 394 | } |
| 395 | switch( tokenType ){ |
| 396 | case TK_SPACE: |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 397 | case TK_COMMENT: { |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 398 | break; |
| 399 | } |
| 400 | case TK_ILLEGAL: |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 401 | sqliteSetNString(pzErrMsg, "unrecognized token: \"", -1, |
| 402 | pParse->sLastToken.z, pParse->sLastToken.n, "\"", 1, 0); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 403 | nErr++; |
| 404 | break; |
| 405 | default: |
| 406 | sqliteParser(pEngine, tokenType, pParse->sLastToken, pParse); |
drh | 6e142f5 | 2000-06-08 13:36:40 +0000 | [diff] [blame] | 407 | if( pParse->zErrMsg && pParse->sErrToken.z ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 408 | sqliteSetNString(pzErrMsg, "near \"", -1, |
| 409 | pParse->sErrToken.z, pParse->sErrToken.n, |
| 410 | "\": ", -1, |
| 411 | pParse->zErrMsg, -1, |
| 412 | 0); |
| 413 | nErr++; |
drh | 6e142f5 | 2000-06-08 13:36:40 +0000 | [diff] [blame] | 414 | sqliteFree(pParse->zErrMsg); |
| 415 | pParse->zErrMsg = 0; |
drh | ecdc753 | 2001-09-23 02:35:53 +0000 | [diff] [blame] | 416 | }else if( pParse->rc!=SQLITE_OK ){ |
drh | 6d4abfb | 2001-10-22 02:58:08 +0000 | [diff] [blame] | 417 | sqliteSetString(pzErrMsg, sqlite_error_string(pParse->rc), 0); |
drh | ecdc753 | 2001-09-23 02:35:53 +0000 | [diff] [blame] | 418 | nErr++; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 419 | } |
| 420 | break; |
| 421 | } |
| 422 | } |
drh | a76b5df | 2002-02-23 02:32:10 +0000 | [diff] [blame^] | 423 | if( zSql[i]==0 ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 424 | sqliteParser(pEngine, 0, pParse->sLastToken, pParse); |
drh | 6e142f5 | 2000-06-08 13:36:40 +0000 | [diff] [blame] | 425 | if( pParse->zErrMsg && pParse->sErrToken.z ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 426 | sqliteSetNString(pzErrMsg, "near \"", -1, |
| 427 | pParse->sErrToken.z, pParse->sErrToken.n, |
| 428 | "\": ", -1, |
| 429 | pParse->zErrMsg, -1, |
| 430 | 0); |
| 431 | nErr++; |
drh | 6e142f5 | 2000-06-08 13:36:40 +0000 | [diff] [blame] | 432 | sqliteFree(pParse->zErrMsg); |
| 433 | pParse->zErrMsg = 0; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 434 | } |
| 435 | } |
drh | dcc581c | 2000-05-30 13:44:19 +0000 | [diff] [blame] | 436 | sqliteParserFree(pEngine, free); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 437 | if( pParse->zErrMsg ){ |
| 438 | if( pzErrMsg ){ |
drh | 6e142f5 | 2000-06-08 13:36:40 +0000 | [diff] [blame] | 439 | sqliteFree(*pzErrMsg); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 440 | *pzErrMsg = pParse->zErrMsg; |
| 441 | }else{ |
| 442 | sqliteFree(pParse->zErrMsg); |
| 443 | } |
| 444 | if( !nErr ) nErr++; |
| 445 | } |
| 446 | if( pParse->pVdbe ){ |
| 447 | sqliteVdbeDelete(pParse->pVdbe); |
| 448 | pParse->pVdbe = 0; |
| 449 | } |
| 450 | if( pParse->pNewTable ){ |
| 451 | sqliteDeleteTable(pParse->db, pParse->pNewTable); |
| 452 | pParse->pNewTable = 0; |
| 453 | } |
drh | 4c50439 | 2000-10-16 22:06:40 +0000 | [diff] [blame] | 454 | if( nErr>0 && pParse->rc==SQLITE_OK ){ |
| 455 | pParse->rc = SQLITE_ERROR; |
| 456 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 457 | return nErr; |
| 458 | } |