drh | a2b902d | 2005-08-14 17:53:20 +0000 | [diff] [blame] | 1 | /* |
| 2 | ** 2001 September 15 |
| 3 | ** |
| 4 | ** The author disclaims copyright to this source code. In place of |
| 5 | ** a legal notice, here is a blessing: |
| 6 | ** |
| 7 | ** May you do good and not evil. |
| 8 | ** May you find forgiveness for yourself and forgive others. |
| 9 | ** May you share freely, never taking more than you give. |
| 10 | ** |
| 11 | ************************************************************************* |
| 12 | ** An tokenizer for SQL |
| 13 | ** |
| 14 | ** This file contains C code that implements the sqlite3_complete() API. |
| 15 | ** This code used to be part of the tokenizer.c source file. But by |
| 16 | ** separating it out, the code will be automatically omitted from |
| 17 | ** static links that do not use it. |
| 18 | ** |
danielk1977 | 54f0198 | 2006-01-18 15:25:17 +0000 | [diff] [blame^] | 19 | ** $Id: complete.c,v 1.3 2006/01/18 15:25:17 danielk1977 Exp $ |
drh | a2b902d | 2005-08-14 17:53:20 +0000 | [diff] [blame] | 20 | */ |
| 21 | #include "sqliteInt.h" |
| 22 | #ifndef SQLITE_OMIT_COMPLETE |
| 23 | |
| 24 | /* |
| 25 | ** This is defined in tokenize.c. We just have to import the definition. |
| 26 | */ |
| 27 | extern const char sqlite3IsIdChar[]; |
| 28 | #define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsIdChar[c-0x20])) |
| 29 | |
| 30 | |
| 31 | /* |
| 32 | ** Token types used by the sqlite3_complete() routine. See the header |
| 33 | ** comments on that procedure for additional information. |
| 34 | */ |
| 35 | #define tkSEMI 0 |
| 36 | #define tkWS 1 |
| 37 | #define tkOTHER 2 |
| 38 | #define tkEXPLAIN 3 |
| 39 | #define tkCREATE 4 |
| 40 | #define tkTEMP 5 |
| 41 | #define tkTRIGGER 6 |
| 42 | #define tkEND 7 |
| 43 | |
| 44 | /* |
| 45 | ** Return TRUE if the given SQL string ends in a semicolon. |
| 46 | ** |
| 47 | ** Special handling is require for CREATE TRIGGER statements. |
| 48 | ** Whenever the CREATE TRIGGER keywords are seen, the statement |
| 49 | ** must end with ";END;". |
| 50 | ** |
| 51 | ** This implementation uses a state machine with 7 states: |
| 52 | ** |
| 53 | ** (0) START At the beginning or end of an SQL statement. This routine |
| 54 | ** returns 1 if it ends in the START state and 0 if it ends |
| 55 | ** in any other state. |
| 56 | ** |
| 57 | ** (1) NORMAL We are in the middle of statement which ends with a single |
| 58 | ** semicolon. |
| 59 | ** |
| 60 | ** (2) EXPLAIN The keyword EXPLAIN has been seen at the beginning of |
| 61 | ** a statement. |
| 62 | ** |
| 63 | ** (3) CREATE The keyword CREATE has been seen at the beginning of a |
| 64 | ** statement, possibly preceeded by EXPLAIN and/or followed by |
| 65 | ** TEMP or TEMPORARY |
| 66 | ** |
| 67 | ** (4) TRIGGER We are in the middle of a trigger definition that must be |
| 68 | ** ended by a semicolon, the keyword END, and another semicolon. |
| 69 | ** |
| 70 | ** (5) SEMI We've seen the first semicolon in the ";END;" that occurs at |
| 71 | ** the end of a trigger definition. |
| 72 | ** |
| 73 | ** (6) END We've seen the ";END" of the ";END;" that occurs at the end |
| 74 | ** of a trigger difinition. |
| 75 | ** |
| 76 | ** Transitions between states above are determined by tokens extracted |
| 77 | ** from the input. The following tokens are significant: |
| 78 | ** |
| 79 | ** (0) tkSEMI A semicolon. |
| 80 | ** (1) tkWS Whitespace |
| 81 | ** (2) tkOTHER Any other SQL token. |
| 82 | ** (3) tkEXPLAIN The "explain" keyword. |
| 83 | ** (4) tkCREATE The "create" keyword. |
| 84 | ** (5) tkTEMP The "temp" or "temporary" keyword. |
| 85 | ** (6) tkTRIGGER The "trigger" keyword. |
| 86 | ** (7) tkEND The "end" keyword. |
| 87 | ** |
| 88 | ** Whitespace never causes a state transition and is always ignored. |
| 89 | ** |
| 90 | ** If we compile with SQLITE_OMIT_TRIGGER, all of the computation needed |
| 91 | ** to recognize the end of a trigger can be omitted. All we have to do |
| 92 | ** is look for a semicolon that is not part of an string or comment. |
| 93 | */ |
| 94 | int sqlite3_complete(const char *zSql){ |
| 95 | u8 state = 0; /* Current state, using numbers defined in header comment */ |
| 96 | u8 token; /* Value of the next token */ |
| 97 | |
| 98 | #ifndef SQLITE_OMIT_TRIGGER |
| 99 | /* A complex statement machine used to detect the end of a CREATE TRIGGER |
| 100 | ** statement. This is the normal case. |
| 101 | */ |
| 102 | static const u8 trans[7][8] = { |
| 103 | /* Token: */ |
| 104 | /* State: ** SEMI WS OTHER EXPLAIN CREATE TEMP TRIGGER END */ |
| 105 | /* 0 START: */ { 0, 0, 1, 2, 3, 1, 1, 1, }, |
| 106 | /* 1 NORMAL: */ { 0, 1, 1, 1, 1, 1, 1, 1, }, |
| 107 | /* 2 EXPLAIN: */ { 0, 2, 1, 1, 3, 1, 1, 1, }, |
| 108 | /* 3 CREATE: */ { 0, 3, 1, 1, 1, 3, 4, 1, }, |
| 109 | /* 4 TRIGGER: */ { 5, 4, 4, 4, 4, 4, 4, 4, }, |
| 110 | /* 5 SEMI: */ { 5, 5, 4, 4, 4, 4, 4, 6, }, |
| 111 | /* 6 END: */ { 0, 6, 4, 4, 4, 4, 4, 4, }, |
| 112 | }; |
| 113 | #else |
| 114 | /* If triggers are not suppored by this compile then the statement machine |
| 115 | ** used to detect the end of a statement is much simplier |
| 116 | */ |
| 117 | static const u8 trans[2][3] = { |
| 118 | /* Token: */ |
| 119 | /* State: ** SEMI WS OTHER */ |
| 120 | /* 0 START: */ { 0, 0, 1, }, |
| 121 | /* 1 NORMAL: */ { 0, 1, 1, }, |
| 122 | }; |
| 123 | #endif /* SQLITE_OMIT_TRIGGER */ |
| 124 | |
| 125 | while( *zSql ){ |
| 126 | switch( *zSql ){ |
| 127 | case ';': { /* A semicolon */ |
| 128 | token = tkSEMI; |
| 129 | break; |
| 130 | } |
| 131 | case ' ': |
| 132 | case '\r': |
| 133 | case '\t': |
| 134 | case '\n': |
| 135 | case '\f': { /* White space is ignored */ |
| 136 | token = tkWS; |
| 137 | break; |
| 138 | } |
| 139 | case '/': { /* C-style comments */ |
| 140 | if( zSql[1]!='*' ){ |
| 141 | token = tkOTHER; |
| 142 | break; |
| 143 | } |
| 144 | zSql += 2; |
| 145 | while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; } |
| 146 | if( zSql[0]==0 ) return 0; |
| 147 | zSql++; |
| 148 | token = tkWS; |
| 149 | break; |
| 150 | } |
| 151 | case '-': { /* SQL-style comments from "--" to end of line */ |
| 152 | if( zSql[1]!='-' ){ |
| 153 | token = tkOTHER; |
| 154 | break; |
| 155 | } |
| 156 | while( *zSql && *zSql!='\n' ){ zSql++; } |
| 157 | if( *zSql==0 ) return state==0; |
| 158 | token = tkWS; |
| 159 | break; |
| 160 | } |
| 161 | case '[': { /* Microsoft-style identifiers in [...] */ |
| 162 | zSql++; |
| 163 | while( *zSql && *zSql!=']' ){ zSql++; } |
| 164 | if( *zSql==0 ) return 0; |
| 165 | token = tkOTHER; |
| 166 | break; |
| 167 | } |
| 168 | case '`': /* Grave-accent quoted symbols used by MySQL */ |
| 169 | case '"': /* single- and double-quoted strings */ |
| 170 | case '\'': { |
| 171 | int c = *zSql; |
| 172 | zSql++; |
| 173 | while( *zSql && *zSql!=c ){ zSql++; } |
| 174 | if( *zSql==0 ) return 0; |
| 175 | token = tkOTHER; |
| 176 | break; |
| 177 | } |
| 178 | default: { |
| 179 | int c; |
| 180 | if( IdChar((u8)*zSql) ){ |
| 181 | /* Keywords and unquoted identifiers */ |
| 182 | int nId; |
| 183 | for(nId=1; IdChar(zSql[nId]); nId++){} |
| 184 | #ifdef SQLITE_OMIT_TRIGGER |
| 185 | token = tkOTHER; |
| 186 | #else |
| 187 | switch( *zSql ){ |
| 188 | case 'c': case 'C': { |
| 189 | if( nId==6 && sqlite3StrNICmp(zSql, "create", 6)==0 ){ |
| 190 | token = tkCREATE; |
| 191 | }else{ |
| 192 | token = tkOTHER; |
| 193 | } |
| 194 | break; |
| 195 | } |
| 196 | case 't': case 'T': { |
| 197 | if( nId==7 && sqlite3StrNICmp(zSql, "trigger", 7)==0 ){ |
| 198 | token = tkTRIGGER; |
| 199 | }else if( nId==4 && sqlite3StrNICmp(zSql, "temp", 4)==0 ){ |
| 200 | token = tkTEMP; |
| 201 | }else if( nId==9 && sqlite3StrNICmp(zSql, "temporary", 9)==0 ){ |
| 202 | token = tkTEMP; |
| 203 | }else{ |
| 204 | token = tkOTHER; |
| 205 | } |
| 206 | break; |
| 207 | } |
| 208 | case 'e': case 'E': { |
| 209 | if( nId==3 && sqlite3StrNICmp(zSql, "end", 3)==0 ){ |
| 210 | token = tkEND; |
| 211 | }else |
| 212 | #ifndef SQLITE_OMIT_EXPLAIN |
| 213 | if( nId==7 && sqlite3StrNICmp(zSql, "explain", 7)==0 ){ |
| 214 | token = tkEXPLAIN; |
| 215 | }else |
| 216 | #endif |
| 217 | { |
| 218 | token = tkOTHER; |
| 219 | } |
| 220 | break; |
| 221 | } |
| 222 | default: { |
| 223 | token = tkOTHER; |
| 224 | break; |
| 225 | } |
| 226 | } |
| 227 | #endif /* SQLITE_OMIT_TRIGGER */ |
| 228 | zSql += nId-1; |
| 229 | }else{ |
| 230 | /* Operators and special symbols */ |
| 231 | token = tkOTHER; |
| 232 | } |
| 233 | break; |
| 234 | } |
| 235 | } |
| 236 | state = trans[state][token]; |
| 237 | zSql++; |
| 238 | } |
| 239 | return state==0; |
| 240 | } |
| 241 | |
| 242 | #ifndef SQLITE_OMIT_UTF16 |
| 243 | /* |
| 244 | ** This routine is the same as the sqlite3_complete() routine described |
| 245 | ** above, except that the parameter is required to be UTF-16 encoded, not |
| 246 | ** UTF-8. |
| 247 | */ |
| 248 | int sqlite3_complete16(const void *zSql){ |
| 249 | sqlite3_value *pVal; |
| 250 | char const *zSql8; |
| 251 | int rc = 0; |
| 252 | |
| 253 | pVal = sqlite3ValueNew(); |
| 254 | sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC); |
| 255 | zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8); |
| 256 | if( zSql8 ){ |
| 257 | rc = sqlite3_complete(zSql8); |
drh | a2b902d | 2005-08-14 17:53:20 +0000 | [diff] [blame] | 258 | } |
| 259 | sqlite3ValueFree(pVal); |
danielk1977 | 54f0198 | 2006-01-18 15:25:17 +0000 | [diff] [blame^] | 260 | return sqlite3ApiExit(0, rc); |
drh | a2b902d | 2005-08-14 17:53:20 +0000 | [diff] [blame] | 261 | } |
| 262 | #endif /* SQLITE_OMIT_UTF16 */ |
| 263 | #endif /* SQLITE_OMIT_COMPLETE */ |