drh | 9978c97 | 2010-02-23 17:36:32 +0000 | [diff] [blame] | 1 | /* |
| 2 | ** 2001 September 15 |
| 3 | ** |
| 4 | ** The author disclaims copyright to this source code. In place of |
| 5 | ** a legal notice, here is a blessing: |
| 6 | ** |
| 7 | ** May you do good and not evil. |
| 8 | ** May you find forgiveness for yourself and forgive others. |
| 9 | ** May you share freely, never taking more than you give. |
| 10 | ** |
| 11 | ************************************************************************* |
| 12 | ** An tokenizer for SQL |
| 13 | ** |
| 14 | ** This file contains C code that implements the sqlite3_complete() API. |
| 15 | ** This code used to be part of the tokenizer.c source file. But by |
| 16 | ** separating it out, the code will be automatically omitted from |
| 17 | ** static links that do not use it. |
| 18 | */ |
| 19 | #include "sqliteInt.h" |
| 20 | #ifndef SQLITE_OMIT_COMPLETE |
| 21 | |
| 22 | /* |
| 23 | ** This is defined in tokenize.c. We just have to import the definition. |
| 24 | */ |
| 25 | #ifndef SQLITE_AMALGAMATION |
| 26 | #ifdef SQLITE_ASCII |
| 27 | #define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0) |
| 28 | #endif |
| 29 | #ifdef SQLITE_EBCDIC |
| 30 | extern const char sqlite3IsEbcdicIdChar[]; |
| 31 | #define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) |
| 32 | #endif |
| 33 | #endif /* SQLITE_AMALGAMATION */ |
| 34 | |
| 35 | |
| 36 | /* |
| 37 | ** Token types used by the sqlite3_complete() routine. See the header |
| 38 | ** comments on that procedure for additional information. |
| 39 | */ |
| 40 | #define tkSEMI 0 |
| 41 | #define tkWS 1 |
| 42 | #define tkOTHER 2 |
| 43 | #ifndef SQLITE_OMIT_TRIGGER |
| 44 | #define tkEXPLAIN 3 |
| 45 | #define tkCREATE 4 |
| 46 | #define tkTEMP 5 |
| 47 | #define tkTRIGGER 6 |
| 48 | #define tkEND 7 |
| 49 | #endif |
| 50 | |
| 51 | /* |
| 52 | ** Return TRUE if the given SQL string ends in a semicolon. |
| 53 | ** |
| 54 | ** Special handling is require for CREATE TRIGGER statements. |
| 55 | ** Whenever the CREATE TRIGGER keywords are seen, the statement |
| 56 | ** must end with ";END;". |
| 57 | ** |
| 58 | ** This implementation uses a state machine with 8 states: |
| 59 | ** |
| 60 | ** (0) INVALID We have not yet seen a non-whitespace character. |
| 61 | ** |
| 62 | ** (1) START At the beginning or end of an SQL statement. This routine |
| 63 | ** returns 1 if it ends in the START state and 0 if it ends |
| 64 | ** in any other state. |
| 65 | ** |
| 66 | ** (2) NORMAL We are in the middle of statement which ends with a single |
| 67 | ** semicolon. |
| 68 | ** |
| 69 | ** (3) EXPLAIN The keyword EXPLAIN has been seen at the beginning of |
| 70 | ** a statement. |
| 71 | ** |
| 72 | ** (4) CREATE The keyword CREATE has been seen at the beginning of a |
peter.d.reid | 60ec914 | 2014-09-06 16:39:46 +0000 | [diff] [blame] | 73 | ** statement, possibly preceded by EXPLAIN and/or followed by |
drh | 9978c97 | 2010-02-23 17:36:32 +0000 | [diff] [blame] | 74 | ** TEMP or TEMPORARY |
| 75 | ** |
| 76 | ** (5) TRIGGER We are in the middle of a trigger definition that must be |
| 77 | ** ended by a semicolon, the keyword END, and another semicolon. |
| 78 | ** |
| 79 | ** (6) SEMI We've seen the first semicolon in the ";END;" that occurs at |
| 80 | ** the end of a trigger definition. |
| 81 | ** |
| 82 | ** (7) END We've seen the ";END" of the ";END;" that occurs at the end |
peter.d.reid | 60ec914 | 2014-09-06 16:39:46 +0000 | [diff] [blame] | 83 | ** of a trigger definition. |
drh | 9978c97 | 2010-02-23 17:36:32 +0000 | [diff] [blame] | 84 | ** |
| 85 | ** Transitions between states above are determined by tokens extracted |
| 86 | ** from the input. The following tokens are significant: |
| 87 | ** |
| 88 | ** (0) tkSEMI A semicolon. |
| 89 | ** (1) tkWS Whitespace. |
| 90 | ** (2) tkOTHER Any other SQL token. |
| 91 | ** (3) tkEXPLAIN The "explain" keyword. |
| 92 | ** (4) tkCREATE The "create" keyword. |
| 93 | ** (5) tkTEMP The "temp" or "temporary" keyword. |
| 94 | ** (6) tkTRIGGER The "trigger" keyword. |
| 95 | ** (7) tkEND The "end" keyword. |
| 96 | ** |
| 97 | ** Whitespace never causes a state transition and is always ignored. |
| 98 | ** This means that a SQL string of all whitespace is invalid. |
| 99 | ** |
| 100 | ** If we compile with SQLITE_OMIT_TRIGGER, all of the computation needed |
| 101 | ** to recognize the end of a trigger can be omitted. All we have to do |
| 102 | ** is look for a semicolon that is not part of an string or comment. |
| 103 | */ |
| 104 | int sqlite3_complete(const char *zSql){ |
| 105 | u8 state = 0; /* Current state, using numbers defined in header comment */ |
| 106 | u8 token; /* Value of the next token */ |
| 107 | |
| 108 | #ifndef SQLITE_OMIT_TRIGGER |
| 109 | /* A complex statement machine used to detect the end of a CREATE TRIGGER |
| 110 | ** statement. This is the normal case. |
| 111 | */ |
| 112 | static const u8 trans[8][8] = { |
| 113 | /* Token: */ |
| 114 | /* State: ** SEMI WS OTHER EXPLAIN CREATE TEMP TRIGGER END */ |
| 115 | /* 0 INVALID: */ { 1, 0, 2, 3, 4, 2, 2, 2, }, |
| 116 | /* 1 START: */ { 1, 1, 2, 3, 4, 2, 2, 2, }, |
| 117 | /* 2 NORMAL: */ { 1, 2, 2, 2, 2, 2, 2, 2, }, |
| 118 | /* 3 EXPLAIN: */ { 1, 3, 3, 2, 4, 2, 2, 2, }, |
| 119 | /* 4 CREATE: */ { 1, 4, 2, 2, 2, 4, 5, 2, }, |
| 120 | /* 5 TRIGGER: */ { 6, 5, 5, 5, 5, 5, 5, 5, }, |
| 121 | /* 6 SEMI: */ { 6, 6, 5, 5, 5, 5, 5, 7, }, |
| 122 | /* 7 END: */ { 1, 7, 5, 5, 5, 5, 5, 5, }, |
| 123 | }; |
| 124 | #else |
| 125 | /* If triggers are not supported by this compile then the statement machine |
peter.d.reid | 60ec914 | 2014-09-06 16:39:46 +0000 | [diff] [blame] | 126 | ** used to detect the end of a statement is much simpler |
drh | 9978c97 | 2010-02-23 17:36:32 +0000 | [diff] [blame] | 127 | */ |
| 128 | static const u8 trans[3][3] = { |
| 129 | /* Token: */ |
| 130 | /* State: ** SEMI WS OTHER */ |
| 131 | /* 0 INVALID: */ { 1, 0, 2, }, |
| 132 | /* 1 START: */ { 1, 1, 2, }, |
| 133 | /* 2 NORMAL: */ { 1, 2, 2, }, |
| 134 | }; |
| 135 | #endif /* SQLITE_OMIT_TRIGGER */ |
| 136 | |
mistachkin | cd54bab | 2014-12-20 21:14:14 +0000 | [diff] [blame] | 137 | #ifdef SQLITE_ENABLE_API_ARMOR |
| 138 | if( zSql==0 ){ |
| 139 | (void)SQLITE_MISUSE_BKPT; |
| 140 | return 0; |
| 141 | } |
| 142 | #endif |
| 143 | |
drh | 9978c97 | 2010-02-23 17:36:32 +0000 | [diff] [blame] | 144 | while( *zSql ){ |
| 145 | switch( *zSql ){ |
| 146 | case ';': { /* A semicolon */ |
| 147 | token = tkSEMI; |
| 148 | break; |
| 149 | } |
| 150 | case ' ': |
| 151 | case '\r': |
| 152 | case '\t': |
| 153 | case '\n': |
| 154 | case '\f': { /* White space is ignored */ |
| 155 | token = tkWS; |
| 156 | break; |
| 157 | } |
| 158 | case '/': { /* C-style comments */ |
| 159 | if( zSql[1]!='*' ){ |
| 160 | token = tkOTHER; |
| 161 | break; |
| 162 | } |
| 163 | zSql += 2; |
| 164 | while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; } |
| 165 | if( zSql[0]==0 ) return 0; |
| 166 | zSql++; |
| 167 | token = tkWS; |
| 168 | break; |
| 169 | } |
| 170 | case '-': { /* SQL-style comments from "--" to end of line */ |
| 171 | if( zSql[1]!='-' ){ |
| 172 | token = tkOTHER; |
| 173 | break; |
| 174 | } |
| 175 | while( *zSql && *zSql!='\n' ){ zSql++; } |
| 176 | if( *zSql==0 ) return state==1; |
| 177 | token = tkWS; |
| 178 | break; |
| 179 | } |
| 180 | case '[': { /* Microsoft-style identifiers in [...] */ |
| 181 | zSql++; |
| 182 | while( *zSql && *zSql!=']' ){ zSql++; } |
| 183 | if( *zSql==0 ) return 0; |
| 184 | token = tkOTHER; |
| 185 | break; |
| 186 | } |
| 187 | case '`': /* Grave-accent quoted symbols used by MySQL */ |
| 188 | case '"': /* single- and double-quoted strings */ |
| 189 | case '\'': { |
| 190 | int c = *zSql; |
| 191 | zSql++; |
| 192 | while( *zSql && *zSql!=c ){ zSql++; } |
| 193 | if( *zSql==0 ) return 0; |
| 194 | token = tkOTHER; |
| 195 | break; |
| 196 | } |
| 197 | default: { |
| 198 | #ifdef SQLITE_EBCDIC |
| 199 | unsigned char c; |
| 200 | #endif |
| 201 | if( IdChar((u8)*zSql) ){ |
| 202 | /* Keywords and unquoted identifiers */ |
| 203 | int nId; |
| 204 | for(nId=1; IdChar(zSql[nId]); nId++){} |
| 205 | #ifdef SQLITE_OMIT_TRIGGER |
| 206 | token = tkOTHER; |
| 207 | #else |
| 208 | switch( *zSql ){ |
| 209 | case 'c': case 'C': { |
| 210 | if( nId==6 && sqlite3StrNICmp(zSql, "create", 6)==0 ){ |
| 211 | token = tkCREATE; |
| 212 | }else{ |
| 213 | token = tkOTHER; |
| 214 | } |
| 215 | break; |
| 216 | } |
| 217 | case 't': case 'T': { |
| 218 | if( nId==7 && sqlite3StrNICmp(zSql, "trigger", 7)==0 ){ |
| 219 | token = tkTRIGGER; |
| 220 | }else if( nId==4 && sqlite3StrNICmp(zSql, "temp", 4)==0 ){ |
| 221 | token = tkTEMP; |
| 222 | }else if( nId==9 && sqlite3StrNICmp(zSql, "temporary", 9)==0 ){ |
| 223 | token = tkTEMP; |
| 224 | }else{ |
| 225 | token = tkOTHER; |
| 226 | } |
| 227 | break; |
| 228 | } |
| 229 | case 'e': case 'E': { |
| 230 | if( nId==3 && sqlite3StrNICmp(zSql, "end", 3)==0 ){ |
| 231 | token = tkEND; |
| 232 | }else |
| 233 | #ifndef SQLITE_OMIT_EXPLAIN |
| 234 | if( nId==7 && sqlite3StrNICmp(zSql, "explain", 7)==0 ){ |
| 235 | token = tkEXPLAIN; |
| 236 | }else |
| 237 | #endif |
| 238 | { |
| 239 | token = tkOTHER; |
| 240 | } |
| 241 | break; |
| 242 | } |
| 243 | default: { |
| 244 | token = tkOTHER; |
| 245 | break; |
| 246 | } |
| 247 | } |
| 248 | #endif /* SQLITE_OMIT_TRIGGER */ |
| 249 | zSql += nId-1; |
| 250 | }else{ |
| 251 | /* Operators and special symbols */ |
| 252 | token = tkOTHER; |
| 253 | } |
| 254 | break; |
| 255 | } |
| 256 | } |
| 257 | state = trans[state][token]; |
| 258 | zSql++; |
| 259 | } |
| 260 | return state==1; |
| 261 | } |
| 262 | |
| 263 | #ifndef SQLITE_OMIT_UTF16 |
| 264 | /* |
| 265 | ** This routine is the same as the sqlite3_complete() routine described |
| 266 | ** above, except that the parameter is required to be UTF-16 encoded, not |
| 267 | ** UTF-8. |
| 268 | */ |
| 269 | int sqlite3_complete16(const void *zSql){ |
| 270 | sqlite3_value *pVal; |
| 271 | char const *zSql8; |
drh | 50c7bb6 | 2015-04-09 11:24:03 +0000 | [diff] [blame] | 272 | int rc; |
drh | 9978c97 | 2010-02-23 17:36:32 +0000 | [diff] [blame] | 273 | |
| 274 | #ifndef SQLITE_OMIT_AUTOINIT |
| 275 | rc = sqlite3_initialize(); |
| 276 | if( rc ) return rc; |
| 277 | #endif |
| 278 | pVal = sqlite3ValueNew(0); |
| 279 | sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC); |
| 280 | zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8); |
| 281 | if( zSql8 ){ |
| 282 | rc = sqlite3_complete(zSql8); |
| 283 | }else{ |
mistachkin | fad3039 | 2016-02-13 23:43:46 +0000 | [diff] [blame] | 284 | rc = SQLITE_NOMEM_BKPT; |
drh | 9978c97 | 2010-02-23 17:36:32 +0000 | [diff] [blame] | 285 | } |
| 286 | sqlite3ValueFree(pVal); |
drh | 597d2b6 | 2015-06-30 03:13:47 +0000 | [diff] [blame] | 287 | return rc & 0xff; |
drh | 9978c97 | 2010-02-23 17:36:32 +0000 | [diff] [blame] | 288 | } |
| 289 | #endif /* SQLITE_OMIT_UTF16 */ |
| 290 | #endif /* SQLITE_OMIT_COMPLETE */ |