drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 1 | /* |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 2 | ** 2001 September 15 |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 3 | ** |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 4 | ** The author disclaims copyright to this source code. In place of |
| 5 | ** a legal notice, here is a blessing: |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 6 | ** |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 7 | ** May you do good and not evil. |
| 8 | ** May you find forgiveness for yourself and forgive others. |
| 9 | ** May you share freely, never taking more than you give. |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 10 | ** |
| 11 | ************************************************************************* |
| 12 | ** An tokenizer for SQL |
| 13 | ** |
| 14 | ** This file contains C code that splits an SQL input string up into |
| 15 | ** individual tokens and sends those tokens one-by-one over to the |
| 16 | ** parser for analysis. |
| 17 | ** |
drh | 61b487d | 2003-09-12 02:08:14 +0000 | [diff] [blame^] | 18 | ** $Id: tokenize.c,v 1.63 2003/09/12 02:08:15 drh Exp $ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 19 | */ |
| 20 | #include "sqliteInt.h" |
drh | ad75e98 | 2001-10-09 04:19:46 +0000 | [diff] [blame] | 21 | #include "os.h" |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 22 | #include <ctype.h> |
drh | dcc581c | 2000-05-30 13:44:19 +0000 | [diff] [blame] | 23 | #include <stdlib.h> |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 24 | |
| 25 | /* |
| 26 | ** All the keywords of the SQL language are stored as in a hash |
| 27 | ** table composed of instances of the following structure. |
| 28 | */ |
| 29 | typedef struct Keyword Keyword; |
| 30 | struct Keyword { |
| 31 | char *zName; /* The keyword name */ |
| 32 | int len; /* Number of characters in the keyword */ |
| 33 | int tokenType; /* The token value for this keyword */ |
| 34 | Keyword *pNext; /* Next keyword with the same hash */ |
| 35 | }; |
| 36 | |
| 37 | /* |
| 38 | ** These are the keywords |
| 39 | */ |
| 40 | static Keyword aKeywordTable[] = { |
drh | 9cfcf5d | 2002-01-29 18:41:24 +0000 | [diff] [blame] | 41 | { "ABORT", 0, TK_ABORT, 0 }, |
danielk1977 | c3f9bad | 2002-05-15 08:30:12 +0000 | [diff] [blame] | 42 | { "AFTER", 0, TK_AFTER, 0 }, |
drh | fef5208 | 2000-06-06 01:50:43 +0000 | [diff] [blame] | 43 | { "ALL", 0, TK_ALL, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 44 | { "AND", 0, TK_AND, 0 }, |
| 45 | { "AS", 0, TK_AS, 0 }, |
| 46 | { "ASC", 0, TK_ASC, 0 }, |
drh | 113088e | 2003-03-20 01:16:58 +0000 | [diff] [blame] | 47 | { "ATTACH", 0, TK_ATTACH, 0 }, |
danielk1977 | c3f9bad | 2002-05-15 08:30:12 +0000 | [diff] [blame] | 48 | { "BEFORE", 0, TK_BEFORE, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 49 | { "BEGIN", 0, TK_BEGIN, 0 }, |
drh | fef5208 | 2000-06-06 01:50:43 +0000 | [diff] [blame] | 50 | { "BETWEEN", 0, TK_BETWEEN, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 51 | { "BY", 0, TK_BY, 0 }, |
drh | 04738cb | 2002-06-02 18:19:00 +0000 | [diff] [blame] | 52 | { "CASCADE", 0, TK_CASCADE, 0 }, |
drh | 17a7f8d | 2002-03-24 13:13:27 +0000 | [diff] [blame] | 53 | { "CASE", 0, TK_CASE, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 54 | { "CHECK", 0, TK_CHECK, 0 }, |
drh | f57b14a | 2001-09-14 18:54:08 +0000 | [diff] [blame] | 55 | { "CLUSTER", 0, TK_CLUSTER, 0 }, |
drh | 8e2ca02 | 2002-06-17 17:07:19 +0000 | [diff] [blame] | 56 | { "COLLATE", 0, TK_COLLATE, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 57 | { "COMMIT", 0, TK_COMMIT, 0 }, |
drh | 9cfcf5d | 2002-01-29 18:41:24 +0000 | [diff] [blame] | 58 | { "CONFLICT", 0, TK_CONFLICT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 59 | { "CONSTRAINT", 0, TK_CONSTRAINT, 0 }, |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 60 | { "COPY", 0, TK_COPY, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 61 | { "CREATE", 0, TK_CREATE, 0 }, |
drh | 5ad1a6c | 2002-07-01 12:27:09 +0000 | [diff] [blame] | 62 | { "CROSS", 0, TK_JOIN_KW, 0 }, |
drh | 113088e | 2003-03-20 01:16:58 +0000 | [diff] [blame] | 63 | { "DATABASE", 0, TK_DATABASE, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 64 | { "DEFAULT", 0, TK_DEFAULT, 0 }, |
drh | 04738cb | 2002-06-02 18:19:00 +0000 | [diff] [blame] | 65 | { "DEFERRED", 0, TK_DEFERRED, 0 }, |
| 66 | { "DEFERRABLE", 0, TK_DEFERRABLE, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 67 | { "DELETE", 0, TK_DELETE, 0 }, |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 68 | { "DELIMITERS", 0, TK_DELIMITERS, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 69 | { "DESC", 0, TK_DESC, 0 }, |
drh | 113088e | 2003-03-20 01:16:58 +0000 | [diff] [blame] | 70 | { "DETACH", 0, TK_DETACH, 0 }, |
drh | efb7251 | 2000-05-31 20:00:52 +0000 | [diff] [blame] | 71 | { "DISTINCT", 0, TK_DISTINCT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 72 | { "DROP", 0, TK_DROP, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 73 | { "END", 0, TK_END, 0 }, |
danielk1977 | c3f9bad | 2002-05-15 08:30:12 +0000 | [diff] [blame] | 74 | { "EACH", 0, TK_EACH, 0 }, |
drh | 17a7f8d | 2002-03-24 13:13:27 +0000 | [diff] [blame] | 75 | { "ELSE", 0, TK_ELSE, 0 }, |
drh | 82c3d63 | 2000-06-06 21:56:07 +0000 | [diff] [blame] | 76 | { "EXCEPT", 0, TK_EXCEPT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 77 | { "EXPLAIN", 0, TK_EXPLAIN, 0 }, |
drh | 1c92853 | 2002-01-31 15:54:21 +0000 | [diff] [blame] | 78 | { "FAIL", 0, TK_FAIL, 0 }, |
danielk1977 | c3f9bad | 2002-05-15 08:30:12 +0000 | [diff] [blame] | 79 | { "FOR", 0, TK_FOR, 0 }, |
drh | 04738cb | 2002-06-02 18:19:00 +0000 | [diff] [blame] | 80 | { "FOREIGN", 0, TK_FOREIGN, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 81 | { "FROM", 0, TK_FROM, 0 }, |
drh | 5ad1a6c | 2002-07-01 12:27:09 +0000 | [diff] [blame] | 82 | { "FULL", 0, TK_JOIN_KW, 0 }, |
drh | dce2cbe | 2000-05-31 02:27:49 +0000 | [diff] [blame] | 83 | { "GLOB", 0, TK_GLOB, 0 }, |
drh | 2282792 | 2000-06-06 17:27:05 +0000 | [diff] [blame] | 84 | { "GROUP", 0, TK_GROUP, 0 }, |
| 85 | { "HAVING", 0, TK_HAVING, 0 }, |
drh | 9cfcf5d | 2002-01-29 18:41:24 +0000 | [diff] [blame] | 86 | { "IGNORE", 0, TK_IGNORE, 0 }, |
drh | 04738cb | 2002-06-02 18:19:00 +0000 | [diff] [blame] | 87 | { "IMMEDIATE", 0, TK_IMMEDIATE, 0 }, |
drh | fef5208 | 2000-06-06 01:50:43 +0000 | [diff] [blame] | 88 | { "IN", 0, TK_IN, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 89 | { "INDEX", 0, TK_INDEX, 0 }, |
drh | 04738cb | 2002-06-02 18:19:00 +0000 | [diff] [blame] | 90 | { "INITIALLY", 0, TK_INITIALLY, 0 }, |
drh | 5ad1a6c | 2002-07-01 12:27:09 +0000 | [diff] [blame] | 91 | { "INNER", 0, TK_JOIN_KW, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 92 | { "INSERT", 0, TK_INSERT, 0 }, |
danielk1977 | c3f9bad | 2002-05-15 08:30:12 +0000 | [diff] [blame] | 93 | { "INSTEAD", 0, TK_INSTEAD, 0 }, |
drh | 82c3d63 | 2000-06-06 21:56:07 +0000 | [diff] [blame] | 94 | { "INTERSECT", 0, TK_INTERSECT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 95 | { "INTO", 0, TK_INTO, 0 }, |
| 96 | { "IS", 0, TK_IS, 0 }, |
| 97 | { "ISNULL", 0, TK_ISNULL, 0 }, |
drh | 01f3f25 | 2002-05-24 16:14:15 +0000 | [diff] [blame] | 98 | { "JOIN", 0, TK_JOIN, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 99 | { "KEY", 0, TK_KEY, 0 }, |
drh | 5ad1a6c | 2002-07-01 12:27:09 +0000 | [diff] [blame] | 100 | { "LEFT", 0, TK_JOIN_KW, 0 }, |
drh | dce2cbe | 2000-05-31 02:27:49 +0000 | [diff] [blame] | 101 | { "LIKE", 0, TK_LIKE, 0 }, |
drh | 9bbca4c | 2001-11-06 04:00:18 +0000 | [diff] [blame] | 102 | { "LIMIT", 0, TK_LIMIT, 0 }, |
drh | 04738cb | 2002-06-02 18:19:00 +0000 | [diff] [blame] | 103 | { "MATCH", 0, TK_MATCH, 0 }, |
drh | 5ad1a6c | 2002-07-01 12:27:09 +0000 | [diff] [blame] | 104 | { "NATURAL", 0, TK_JOIN_KW, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 105 | { "NOT", 0, TK_NOT, 0 }, |
| 106 | { "NOTNULL", 0, TK_NOTNULL, 0 }, |
| 107 | { "NULL", 0, TK_NULL, 0 }, |
danielk1977 | c3f9bad | 2002-05-15 08:30:12 +0000 | [diff] [blame] | 108 | { "OF", 0, TK_OF, 0 }, |
drh | 9bbca4c | 2001-11-06 04:00:18 +0000 | [diff] [blame] | 109 | { "OFFSET", 0, TK_OFFSET, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 110 | { "ON", 0, TK_ON, 0 }, |
| 111 | { "OR", 0, TK_OR, 0 }, |
| 112 | { "ORDER", 0, TK_ORDER, 0 }, |
drh | 5ad1a6c | 2002-07-01 12:27:09 +0000 | [diff] [blame] | 113 | { "OUTER", 0, TK_JOIN_KW, 0 }, |
drh | f57b14a | 2001-09-14 18:54:08 +0000 | [diff] [blame] | 114 | { "PRAGMA", 0, TK_PRAGMA, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 115 | { "PRIMARY", 0, TK_PRIMARY, 0 }, |
danielk1977 | 6f34903 | 2002-06-11 02:25:40 +0000 | [diff] [blame] | 116 | { "RAISE", 0, TK_RAISE, 0 }, |
drh | 04738cb | 2002-06-02 18:19:00 +0000 | [diff] [blame] | 117 | { "REFERENCES", 0, TK_REFERENCES, 0 }, |
drh | 9cfcf5d | 2002-01-29 18:41:24 +0000 | [diff] [blame] | 118 | { "REPLACE", 0, TK_REPLACE, 0 }, |
drh | 04738cb | 2002-06-02 18:19:00 +0000 | [diff] [blame] | 119 | { "RESTRICT", 0, TK_RESTRICT, 0 }, |
drh | 5ad1a6c | 2002-07-01 12:27:09 +0000 | [diff] [blame] | 120 | { "RIGHT", 0, TK_JOIN_KW, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 121 | { "ROLLBACK", 0, TK_ROLLBACK, 0 }, |
danielk1977 | c3f9bad | 2002-05-15 08:30:12 +0000 | [diff] [blame] | 122 | { "ROW", 0, TK_ROW, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 123 | { "SELECT", 0, TK_SELECT, 0 }, |
| 124 | { "SET", 0, TK_SET, 0 }, |
drh | 1873cd5 | 2002-05-23 00:30:31 +0000 | [diff] [blame] | 125 | { "STATEMENT", 0, TK_STATEMENT, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 126 | { "TABLE", 0, TK_TABLE, 0 }, |
drh | f57b339 | 2001-10-08 13:22:32 +0000 | [diff] [blame] | 127 | { "TEMP", 0, TK_TEMP, 0 }, |
| 128 | { "TEMPORARY", 0, TK_TEMP, 0 }, |
drh | 17a7f8d | 2002-03-24 13:13:27 +0000 | [diff] [blame] | 129 | { "THEN", 0, TK_THEN, 0 }, |
drh | c4a3c77 | 2001-04-04 11:48:57 +0000 | [diff] [blame] | 130 | { "TRANSACTION", 0, TK_TRANSACTION, 0 }, |
danielk1977 | c3f9bad | 2002-05-15 08:30:12 +0000 | [diff] [blame] | 131 | { "TRIGGER", 0, TK_TRIGGER, 0 }, |
drh | 82c3d63 | 2000-06-06 21:56:07 +0000 | [diff] [blame] | 132 | { "UNION", 0, TK_UNION, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 133 | { "UNIQUE", 0, TK_UNIQUE, 0 }, |
| 134 | { "UPDATE", 0, TK_UPDATE, 0 }, |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 135 | { "USING", 0, TK_USING, 0 }, |
drh | dce2cbe | 2000-05-31 02:27:49 +0000 | [diff] [blame] | 136 | { "VACUUM", 0, TK_VACUUM, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 137 | { "VALUES", 0, TK_VALUES, 0 }, |
drh | a76b5df | 2002-02-23 02:32:10 +0000 | [diff] [blame] | 138 | { "VIEW", 0, TK_VIEW, 0 }, |
drh | 17a7f8d | 2002-03-24 13:13:27 +0000 | [diff] [blame] | 139 | { "WHEN", 0, TK_WHEN, 0 }, |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 140 | { "WHERE", 0, TK_WHERE, 0 }, |
| 141 | }; |
| 142 | |
| 143 | /* |
| 144 | ** This is the hash table |
| 145 | */ |
drh | daffd0e | 2001-04-11 14:28:42 +0000 | [diff] [blame] | 146 | #define KEY_HASH_SIZE 71 |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 147 | static Keyword *apHashTable[KEY_HASH_SIZE]; |
| 148 | |
| 149 | |
| 150 | /* |
| 151 | ** This function looks up an identifier to determine if it is a |
| 152 | ** keyword. If it is a keyword, the token code of that keyword is |
| 153 | ** returned. If the input is not a keyword, TK_ID is returned. |
| 154 | */ |
drh | 17f7193 | 2002-02-21 12:01:27 +0000 | [diff] [blame] | 155 | int sqliteKeywordCode(const char *z, int n){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 156 | int h; |
| 157 | Keyword *p; |
| 158 | if( aKeywordTable[0].len==0 ){ |
| 159 | /* Initialize the keyword hash table */ |
drh | ad75e98 | 2001-10-09 04:19:46 +0000 | [diff] [blame] | 160 | sqliteOsEnterMutex(); |
| 161 | if( aKeywordTable[0].len==0 ){ |
| 162 | int i; |
| 163 | int n; |
| 164 | n = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]); |
| 165 | for(i=0; i<n; i++){ |
| 166 | aKeywordTable[i].len = strlen(aKeywordTable[i].zName); |
| 167 | h = sqliteHashNoCase(aKeywordTable[i].zName, aKeywordTable[i].len); |
| 168 | h %= KEY_HASH_SIZE; |
| 169 | aKeywordTable[i].pNext = apHashTable[h]; |
| 170 | apHashTable[h] = &aKeywordTable[i]; |
| 171 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 172 | } |
drh | ad75e98 | 2001-10-09 04:19:46 +0000 | [diff] [blame] | 173 | sqliteOsLeaveMutex(); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 174 | } |
| 175 | h = sqliteHashNoCase(z, n) % KEY_HASH_SIZE; |
| 176 | for(p=apHashTable[h]; p; p=p->pNext){ |
| 177 | if( p->len==n && sqliteStrNICmp(p->zName, z, n)==0 ){ |
| 178 | return p->tokenType; |
| 179 | } |
| 180 | } |
| 181 | return TK_ID; |
| 182 | } |
| 183 | |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 184 | |
| 185 | /* |
| 186 | ** If X is a character that can be used in an identifier then |
| 187 | ** isIdChar[X] will be 1. Otherwise isIdChar[X] will be 0. |
| 188 | ** |
| 189 | ** In this implementation, an identifier can be a string of |
| 190 | ** alphabetic characters, digits, and "_" plus any character |
| 191 | ** with the high-order bit set. The latter rule means that |
| 192 | ** any sequence of UTF-8 characters or characters taken from |
| 193 | ** an extended ISO8859 character set can form an identifier. |
| 194 | */ |
| 195 | static const char isIdChar[] = { |
| 196 | /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ |
| 197 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ |
| 198 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ |
| 199 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ |
| 200 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ |
| 201 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ |
| 202 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ |
| 203 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ |
| 204 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ |
| 205 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 8x */ |
| 206 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 9x */ |
| 207 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ax */ |
| 208 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Bx */ |
| 209 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Cx */ |
| 210 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Dx */ |
| 211 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ex */ |
| 212 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Fx */ |
| 213 | }; |
| 214 | |
| 215 | |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 216 | /* |
drh | 61b487d | 2003-09-12 02:08:14 +0000 | [diff] [blame^] | 217 | ** Return the length of the token that begins at z[0]. |
| 218 | ** Store the token type in *tokenType before returning. |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 219 | */ |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 220 | static int sqliteGetToken(const unsigned char *z, int *tokenType){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 221 | int i; |
| 222 | switch( *z ){ |
drh | 30cab80 | 2000-08-09 17:17:25 +0000 | [diff] [blame] | 223 | case ' ': case '\t': case '\n': case '\f': case '\r': { |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 224 | for(i=1; isspace(z[i]); i++){} |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 225 | *tokenType = TK_SPACE; |
| 226 | return i; |
| 227 | } |
| 228 | case '-': { |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 229 | if( z[1]=='-' ){ |
| 230 | for(i=2; z[i] && z[i]!='\n'; i++){} |
| 231 | *tokenType = TK_COMMENT; |
| 232 | return i; |
| 233 | } |
| 234 | *tokenType = TK_MINUS; |
| 235 | return 1; |
| 236 | } |
| 237 | case '(': { |
drh | 1f16230 | 2002-10-27 19:35:33 +0000 | [diff] [blame] | 238 | if( z[1]=='+' && z[2]==')' ){ |
| 239 | *tokenType = TK_ORACLE_OUTER_JOIN; |
| 240 | return 3; |
| 241 | }else{ |
| 242 | *tokenType = TK_LP; |
| 243 | return 1; |
| 244 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 245 | } |
| 246 | case ')': { |
| 247 | *tokenType = TK_RP; |
| 248 | return 1; |
| 249 | } |
| 250 | case ';': { |
| 251 | *tokenType = TK_SEMI; |
| 252 | return 1; |
| 253 | } |
| 254 | case '+': { |
| 255 | *tokenType = TK_PLUS; |
| 256 | return 1; |
| 257 | } |
| 258 | case '*': { |
| 259 | *tokenType = TK_STAR; |
| 260 | return 1; |
| 261 | } |
| 262 | case '/': { |
drh | 66105a8 | 2002-08-27 14:28:29 +0000 | [diff] [blame] | 263 | if( z[1]!='*' || z[2]==0 ){ |
| 264 | *tokenType = TK_SLASH; |
| 265 | return 1; |
| 266 | } |
| 267 | for(i=3; z[i] && (z[i]!='/' || z[i-1]!='*'); i++){} |
| 268 | if( z[i] ) i++; |
| 269 | *tokenType = TK_COMMENT; |
| 270 | return i; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 271 | } |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 272 | case '%': { |
| 273 | *tokenType = TK_REM; |
| 274 | return 1; |
| 275 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 276 | case '=': { |
| 277 | *tokenType = TK_EQ; |
| 278 | return 1 + (z[1]=='='); |
| 279 | } |
| 280 | case '<': { |
| 281 | if( z[1]=='=' ){ |
| 282 | *tokenType = TK_LE; |
| 283 | return 2; |
| 284 | }else if( z[1]=='>' ){ |
| 285 | *tokenType = TK_NE; |
| 286 | return 2; |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 287 | }else if( z[1]=='<' ){ |
| 288 | *tokenType = TK_LSHIFT; |
| 289 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 290 | }else{ |
| 291 | *tokenType = TK_LT; |
| 292 | return 1; |
| 293 | } |
| 294 | } |
| 295 | case '>': { |
| 296 | if( z[1]=='=' ){ |
| 297 | *tokenType = TK_GE; |
| 298 | return 2; |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 299 | }else if( z[1]=='>' ){ |
| 300 | *tokenType = TK_RSHIFT; |
| 301 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 302 | }else{ |
| 303 | *tokenType = TK_GT; |
| 304 | return 1; |
| 305 | } |
| 306 | } |
| 307 | case '!': { |
| 308 | if( z[1]!='=' ){ |
| 309 | *tokenType = TK_ILLEGAL; |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 310 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 311 | }else{ |
| 312 | *tokenType = TK_NE; |
| 313 | return 2; |
| 314 | } |
| 315 | } |
drh | 0040077 | 2000-06-16 20:51:26 +0000 | [diff] [blame] | 316 | case '|': { |
| 317 | if( z[1]!='|' ){ |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 318 | *tokenType = TK_BITOR; |
drh | 0040077 | 2000-06-16 20:51:26 +0000 | [diff] [blame] | 319 | return 1; |
| 320 | }else{ |
| 321 | *tokenType = TK_CONCAT; |
| 322 | return 2; |
| 323 | } |
| 324 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 325 | case ',': { |
| 326 | *tokenType = TK_COMMA; |
| 327 | return 1; |
| 328 | } |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 329 | case '&': { |
| 330 | *tokenType = TK_BITAND; |
| 331 | return 1; |
| 332 | } |
| 333 | case '~': { |
| 334 | *tokenType = TK_BITNOT; |
| 335 | return 1; |
| 336 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 337 | case '\'': case '"': { |
| 338 | int delim = z[0]; |
| 339 | for(i=1; z[i]; i++){ |
| 340 | if( z[i]==delim ){ |
| 341 | if( z[i+1]==delim ){ |
| 342 | i++; |
| 343 | }else{ |
| 344 | break; |
| 345 | } |
| 346 | } |
| 347 | } |
| 348 | if( z[i] ) i++; |
| 349 | *tokenType = TK_STRING; |
| 350 | return i; |
| 351 | } |
| 352 | case '.': { |
drh | bb07e9a | 2003-04-16 02:17:35 +0000 | [diff] [blame] | 353 | *tokenType = TK_DOT; |
| 354 | return 1; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 355 | } |
| 356 | case '0': case '1': case '2': case '3': case '4': |
| 357 | case '5': case '6': case '7': case '8': case '9': { |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 358 | *tokenType = TK_INTEGER; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 359 | for(i=1; isdigit(z[i]); i++){} |
drh | bb07e9a | 2003-04-16 02:17:35 +0000 | [diff] [blame] | 360 | if( z[i]=='.' && isdigit(z[i+1]) ){ |
| 361 | i += 2; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 362 | while( isdigit(z[i]) ){ i++; } |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 363 | *tokenType = TK_FLOAT; |
| 364 | } |
| 365 | if( (z[i]=='e' || z[i]=='E') && |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 366 | ( isdigit(z[i+1]) |
| 367 | || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) |
| 368 | ) |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 369 | ){ |
| 370 | i += 2; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 371 | while( isdigit(z[i]) ){ i++; } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 372 | *tokenType = TK_FLOAT; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 373 | } |
| 374 | return i; |
| 375 | } |
drh | 2f4392f | 2002-02-14 21:42:51 +0000 | [diff] [blame] | 376 | case '[': { |
| 377 | for(i=1; z[i] && z[i-1]!=']'; i++){} |
| 378 | *tokenType = TK_ID; |
| 379 | return i; |
| 380 | } |
drh | 7c972de | 2003-09-06 22:18:07 +0000 | [diff] [blame] | 381 | case '?': { |
drh | 5045789 | 2003-09-06 01:10:47 +0000 | [diff] [blame] | 382 | *tokenType = TK_VARIABLE; |
drh | 7c972de | 2003-09-06 22:18:07 +0000 | [diff] [blame] | 383 | return 1; |
drh | 5045789 | 2003-09-06 01:10:47 +0000 | [diff] [blame] | 384 | } |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 385 | default: { |
| 386 | if( !isIdChar[*z] ){ |
| 387 | break; |
| 388 | } |
| 389 | for(i=1; isIdChar[z[i]]; i++){} |
drh | 6a53534 | 2001-10-19 16:44:56 +0000 | [diff] [blame] | 390 | *tokenType = sqliteKeywordCode((char*)z, i); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 391 | return i; |
| 392 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 393 | } |
| 394 | *tokenType = TK_ILLEGAL; |
| 395 | return 1; |
| 396 | } |
| 397 | |
| 398 | /* |
| 399 | ** Run the parser on the given SQL string. The parser structure is |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 400 | ** passed in. An SQLITE_ status code is returned. If an error occurs |
| 401 | ** and pzErrMsg!=NULL then an error message might be written into |
| 402 | ** memory obtained from malloc() and *pzErrMsg made to point to that |
| 403 | ** error message. Or maybe not. |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 404 | */ |
drh | 80ff32f | 2001-11-04 18:32:46 +0000 | [diff] [blame] | 405 | int sqliteRunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 406 | int nErr = 0; |
| 407 | int i; |
| 408 | void *pEngine; |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 409 | int tokenType; |
| 410 | int lastTokenParsed = -1; |
drh | 6d4abfb | 2001-10-22 02:58:08 +0000 | [diff] [blame] | 411 | sqlite *db = pParse->db; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 412 | extern void *sqliteParserAlloc(void*(*)(int)); |
| 413 | extern void sqliteParserFree(void*, void(*)(void*)); |
drh | 338ea13 | 2001-02-11 16:56:24 +0000 | [diff] [blame] | 414 | extern int sqliteParser(void*, int, Token, Parse*); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 415 | |
drh | 6d4abfb | 2001-10-22 02:58:08 +0000 | [diff] [blame] | 416 | db->flags &= ~SQLITE_Interrupt; |
drh | 4c50439 | 2000-10-16 22:06:40 +0000 | [diff] [blame] | 417 | pParse->rc = SQLITE_OK; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 418 | i = 0; |
drh | 982cef7 | 2000-05-30 16:27:03 +0000 | [diff] [blame] | 419 | pEngine = sqliteParserAlloc((void*(*)(int))malloc); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 420 | if( pEngine==0 ){ |
| 421 | sqliteSetString(pzErrMsg, "out of memory", 0); |
| 422 | return 1; |
| 423 | } |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 424 | pParse->sLastToken.dyn = 0; |
drh | 326dce7 | 2003-01-29 14:06:07 +0000 | [diff] [blame] | 425 | pParse->zTail = zSql; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 426 | while( sqlite_malloc_failed==0 && zSql[i]!=0 ){ |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 427 | assert( i>=0 ); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 428 | pParse->sLastToken.z = &zSql[i]; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 429 | assert( pParse->sLastToken.dyn==0 ); |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 430 | pParse->sLastToken.n = sqliteGetToken((unsigned char*)&zSql[i], &tokenType); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 431 | i += pParse->sLastToken.n; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 432 | switch( tokenType ){ |
| 433 | case TK_SPACE: |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 434 | case TK_COMMENT: { |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 435 | if( (db->flags & SQLITE_Interrupt)!=0 ){ |
| 436 | pParse->rc = SQLITE_INTERRUPT; |
| 437 | sqliteSetString(pzErrMsg, "interrupt", 0); |
| 438 | goto abort_parse; |
| 439 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 440 | break; |
| 441 | } |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 442 | case TK_ILLEGAL: { |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 443 | sqliteSetNString(pzErrMsg, "unrecognized token: \"", -1, |
| 444 | pParse->sLastToken.z, pParse->sLastToken.n, "\"", 1, 0); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 445 | nErr++; |
drh | caec2f1 | 2003-01-07 02:47:47 +0000 | [diff] [blame] | 446 | goto abort_parse; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 447 | } |
drh | 326dce7 | 2003-01-29 14:06:07 +0000 | [diff] [blame] | 448 | case TK_SEMI: { |
| 449 | pParse->zTail = &zSql[i]; |
| 450 | /* Fall thru into the default case */ |
| 451 | } |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 452 | default: { |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 453 | sqliteParser(pEngine, tokenType, pParse->sLastToken, pParse); |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 454 | lastTokenParsed = tokenType; |
| 455 | if( pParse->rc!=SQLITE_OK ){ |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 456 | goto abort_parse; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 457 | } |
| 458 | break; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 459 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 460 | } |
| 461 | } |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 462 | abort_parse: |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 463 | if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ |
| 464 | if( lastTokenParsed!=TK_SEMI ){ |
| 465 | sqliteParser(pEngine, TK_SEMI, pParse->sLastToken, pParse); |
drh | 326dce7 | 2003-01-29 14:06:07 +0000 | [diff] [blame] | 466 | pParse->zTail = &zSql[i]; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 467 | } |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 468 | sqliteParser(pEngine, 0, pParse->sLastToken, pParse); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 469 | } |
drh | dcc581c | 2000-05-30 13:44:19 +0000 | [diff] [blame] | 470 | sqliteParserFree(pEngine, free); |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 471 | if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ |
| 472 | sqliteSetString(&pParse->zErrMsg, sqlite_error_string(pParse->rc), 0); |
| 473 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 474 | if( pParse->zErrMsg ){ |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 475 | if( pzErrMsg && *pzErrMsg==0 ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 476 | *pzErrMsg = pParse->zErrMsg; |
| 477 | }else{ |
| 478 | sqliteFree(pParse->zErrMsg); |
| 479 | } |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 480 | pParse->zErrMsg = 0; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 481 | if( !nErr ) nErr++; |
| 482 | } |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 483 | if( pParse->pVdbe && (pParse->useCallback || pParse->nErr>0) ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 484 | sqliteVdbeDelete(pParse->pVdbe); |
| 485 | pParse->pVdbe = 0; |
| 486 | } |
| 487 | if( pParse->pNewTable ){ |
| 488 | sqliteDeleteTable(pParse->db, pParse->pNewTable); |
| 489 | pParse->pNewTable = 0; |
| 490 | } |
drh | f0f258b | 2003-04-21 18:48:45 +0000 | [diff] [blame] | 491 | if( pParse->pNewTrigger ){ |
| 492 | sqliteDeleteTrigger(pParse->pNewTrigger); |
| 493 | pParse->pNewTrigger = 0; |
| 494 | } |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 495 | if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){ |
drh | 4c50439 | 2000-10-16 22:06:40 +0000 | [diff] [blame] | 496 | pParse->rc = SQLITE_ERROR; |
| 497 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 498 | return nErr; |
| 499 | } |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 500 | |
| 501 | /* |
| 502 | ** Token types used by the sqlite_complete() routine. See the header |
| 503 | ** comments on that procedure for additional information. |
| 504 | */ |
| 505 | #define tkEXPLAIN 0 |
| 506 | #define tkCREATE 1 |
| 507 | #define tkTEMP 2 |
| 508 | #define tkTRIGGER 3 |
| 509 | #define tkEND 4 |
| 510 | #define tkSEMI 5 |
| 511 | #define tkWS 6 |
| 512 | #define tkOTHER 7 |
| 513 | |
| 514 | /* |
| 515 | ** Return TRUE if the given SQL string ends in a semicolon. |
| 516 | ** |
| 517 | ** Special handling is require for CREATE TRIGGER statements. |
| 518 | ** Whenever the CREATE TRIGGER keywords are seen, the statement |
| 519 | ** must end with ";END;". |
| 520 | ** |
| 521 | ** This implementation uses a state machine with 7 states: |
| 522 | ** |
| 523 | ** (0) START At the beginning or end of an SQL statement. This routine |
| 524 | ** returns 1 if it ends in the START state and 0 if it ends |
| 525 | ** in any other state. |
| 526 | ** |
| 527 | ** (1) EXPLAIN The keyword EXPLAIN has been seen at the beginning of |
| 528 | ** a statement. |
| 529 | ** |
| 530 | ** (2) CREATE The keyword CREATE has been seen at the beginning of a |
| 531 | ** statement, possibly preceeded by EXPLAIN and/or followed by |
| 532 | ** TEMP or TEMPORARY |
| 533 | ** |
| 534 | ** (3) NORMAL We are in the middle of statement which ends with a single |
| 535 | ** semicolon. |
| 536 | ** |
| 537 | ** (4) TRIGGER We are in the middle of a trigger definition that must be |
| 538 | ** ended by a semicolon, the keyword END, and another semicolon. |
| 539 | ** |
| 540 | ** (5) SEMI We've seen the first semicolon in the ";END;" that occurs at |
| 541 | ** the end of a trigger definition. |
| 542 | ** |
| 543 | ** (6) END We've seen the ";END" of the ";END;" that occurs at the end |
| 544 | ** of a trigger difinition. |
| 545 | ** |
| 546 | ** Transitions between states above are determined by tokens extracted |
| 547 | ** from the input. The following tokens are significant: |
| 548 | ** |
| 549 | ** (0) tkEXPLAIN The "explain" keyword. |
| 550 | ** (1) tkCREATE The "create" keyword. |
| 551 | ** (2) tkTEMP The "temp" or "temporary" keyword. |
| 552 | ** (3) tkTRIGGER The "trigger" keyword. |
| 553 | ** (4) tkEND The "end" keyword. |
| 554 | ** (5) tkSEMI A semicolon. |
| 555 | ** (6) tkWS Whitespace |
| 556 | ** (7) tkOTHER Any other SQL token. |
| 557 | ** |
| 558 | ** Whitespace never causes a state transition and is always ignored. |
| 559 | */ |
| 560 | int sqlite_complete(const char *zSql){ |
| 561 | u8 state = 0; /* Current state, using numbers defined in header comment */ |
| 562 | u8 token; /* Value of the next token */ |
| 563 | |
| 564 | /* The following matrix defines the transition from one state to another |
| 565 | ** according to what token is seen. trans[state][token] returns the |
| 566 | ** next state. |
| 567 | */ |
| 568 | static const u8 trans[7][8] = { |
drh | e1e38c4 | 2003-05-04 18:30:59 +0000 | [diff] [blame] | 569 | /* Token: */ |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 570 | /* State: ** EXPLAIN CREATE TEMP TRIGGER END SEMI WS OTHER */ |
| 571 | /* 0 START: */ { 1, 2, 3, 3, 3, 0, 0, 3, }, |
| 572 | /* 1 EXPLAIN: */ { 3, 2, 3, 3, 3, 0, 1, 3, }, |
| 573 | /* 2 CREATE: */ { 3, 3, 2, 4, 3, 0, 2, 3, }, |
| 574 | /* 3 NORMAL: */ { 3, 3, 3, 3, 3, 0, 3, 3, }, |
| 575 | /* 4 TRIGGER: */ { 4, 4, 4, 4, 4, 5, 4, 4, }, |
| 576 | /* 5 SEMI: */ { 4, 4, 4, 4, 6, 5, 5, 4, }, |
| 577 | /* 6 END: */ { 4, 4, 4, 4, 4, 0, 6, 4, }, |
| 578 | }; |
| 579 | |
| 580 | while( *zSql ){ |
| 581 | switch( *zSql ){ |
| 582 | case ';': { /* A semicolon */ |
| 583 | token = tkSEMI; |
| 584 | break; |
| 585 | } |
| 586 | case ' ': |
| 587 | case '\r': |
| 588 | case '\t': |
| 589 | case '\n': |
| 590 | case '\f': { /* White space is ignored */ |
| 591 | token = tkWS; |
| 592 | break; |
| 593 | } |
| 594 | case '/': { /* C-style comments */ |
| 595 | if( zSql[1]!='*' ){ |
| 596 | token = tkOTHER; |
| 597 | break; |
| 598 | } |
| 599 | zSql += 2; |
| 600 | while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; } |
| 601 | if( zSql[0]==0 ) return 0; |
| 602 | zSql++; |
| 603 | token = tkWS; |
| 604 | break; |
| 605 | } |
| 606 | case '-': { /* SQL-style comments from "--" to end of line */ |
| 607 | if( zSql[1]!='-' ){ |
| 608 | token = tkOTHER; |
| 609 | break; |
| 610 | } |
| 611 | while( *zSql && *zSql!='\n' ){ zSql++; } |
| 612 | if( *zSql==0 ) return state==0; |
| 613 | token = tkWS; |
| 614 | break; |
| 615 | } |
| 616 | case '[': { /* Microsoft-style identifiers in [...] */ |
| 617 | zSql++; |
| 618 | while( *zSql && *zSql!=']' ){ zSql++; } |
| 619 | if( *zSql==0 ) return 0; |
| 620 | token = tkOTHER; |
| 621 | break; |
| 622 | } |
| 623 | case '"': /* single- and double-quoted strings */ |
| 624 | case '\'': { |
| 625 | int c = *zSql; |
| 626 | zSql++; |
| 627 | while( *zSql && *zSql!=c ){ zSql++; } |
| 628 | if( *zSql==0 ) return 0; |
| 629 | token = tkOTHER; |
| 630 | break; |
| 631 | } |
| 632 | default: { |
drh | e1e38c4 | 2003-05-04 18:30:59 +0000 | [diff] [blame] | 633 | if( isIdChar[(u8)*zSql] ){ |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 634 | /* Keywords and unquoted identifiers */ |
| 635 | int nId; |
drh | e1e38c4 | 2003-05-04 18:30:59 +0000 | [diff] [blame] | 636 | for(nId=1; isIdChar[(u8)zSql[nId]]; nId++){} |
drh | 7ad4334 | 2003-05-04 17:58:25 +0000 | [diff] [blame] | 637 | switch( *zSql ){ |
| 638 | case 'c': case 'C': { |
| 639 | if( nId==6 && sqliteStrNICmp(zSql, "create", 6)==0 ){ |
| 640 | token = tkCREATE; |
| 641 | }else{ |
| 642 | token = tkOTHER; |
| 643 | } |
| 644 | break; |
| 645 | } |
| 646 | case 't': case 'T': { |
| 647 | if( nId==7 && sqliteStrNICmp(zSql, "trigger", 7)==0 ){ |
| 648 | token = tkTRIGGER; |
| 649 | }else if( nId==4 && sqliteStrNICmp(zSql, "temp", 4)==0 ){ |
| 650 | token = tkTEMP; |
| 651 | }else if( nId==9 && sqliteStrNICmp(zSql, "temporary", 9)==0 ){ |
| 652 | token = tkTEMP; |
| 653 | }else{ |
| 654 | token = tkOTHER; |
| 655 | } |
| 656 | break; |
| 657 | } |
| 658 | case 'e': case 'E': { |
| 659 | if( nId==3 && sqliteStrNICmp(zSql, "end", 3)==0 ){ |
| 660 | token = tkEND; |
| 661 | }else if( nId==7 && sqliteStrNICmp(zSql, "explain", 7)==0 ){ |
| 662 | token = tkEXPLAIN; |
| 663 | }else{ |
| 664 | token = tkOTHER; |
| 665 | } |
| 666 | break; |
| 667 | } |
| 668 | default: { |
| 669 | token = tkOTHER; |
| 670 | break; |
| 671 | } |
| 672 | } |
| 673 | zSql += nId-1; |
| 674 | }else{ |
| 675 | /* Operators and special symbols */ |
| 676 | token = tkOTHER; |
| 677 | } |
| 678 | break; |
| 679 | } |
| 680 | } |
| 681 | state = trans[state][token]; |
| 682 | zSql++; |
| 683 | } |
| 684 | return state==0; |
| 685 | } |