drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 1 | /* |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 2 | ** 2001 September 15 |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 3 | ** |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 4 | ** The author disclaims copyright to this source code. In place of |
| 5 | ** a legal notice, here is a blessing: |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 6 | ** |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 7 | ** May you do good and not evil. |
| 8 | ** May you find forgiveness for yourself and forgive others. |
| 9 | ** May you share freely, never taking more than you give. |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 10 | ** |
| 11 | ************************************************************************* |
| 12 | ** An tokenizer for SQL |
| 13 | ** |
| 14 | ** This file contains C code that splits an SQL input string up into |
| 15 | ** individual tokens and sends those tokens one-by-one over to the |
| 16 | ** parser for analysis. |
| 17 | ** |
drh | eab7f3f | 2007-05-15 09:00:14 +0000 | [diff] [blame] | 18 | ** $Id: tokenize.c,v 1.128 2007/05/15 09:00:15 drh Exp $ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 19 | */ |
| 20 | #include "sqliteInt.h" |
drh | ad75e98 | 2001-10-09 04:19:46 +0000 | [diff] [blame] | 21 | #include "os.h" |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 22 | #include <ctype.h> |
drh | dcc581c | 2000-05-30 13:44:19 +0000 | [diff] [blame] | 23 | #include <stdlib.h> |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 24 | |
| 25 | /* |
drh | 9b8f447 | 2006-04-04 01:54:55 +0000 | [diff] [blame] | 26 | ** The charMap() macro maps alphabetic characters into their |
| 27 | ** lower-case ASCII equivalent. On ASCII machines, this is just |
| 28 | ** an upper-to-lower case map. On EBCDIC machines we also need |
| 29 | ** to adjust the encoding. Only alphabetic characters and underscores |
| 30 | ** need to be translated. |
| 31 | */ |
| 32 | #ifdef SQLITE_ASCII |
| 33 | # define charMap(X) sqlite3UpperToLower[(unsigned char)X] |
| 34 | #endif |
| 35 | #ifdef SQLITE_EBCDIC |
| 36 | # define charMap(X) ebcdicToAscii[(unsigned char)X] |
| 37 | const unsigned char ebcdicToAscii[] = { |
| 38 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ |
| 39 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ |
| 40 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ |
| 41 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ |
| 42 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ |
| 43 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ |
| 44 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ |
| 45 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ |
| 46 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ |
| 47 | 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ |
| 48 | 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ |
| 49 | 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ |
| 50 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ |
| 51 | 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ |
| 52 | 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ |
| 53 | 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ |
| 54 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ |
| 55 | }; |
| 56 | #endif |
| 57 | |
| 58 | /* |
drh | 52fb6d7 | 2004-11-03 03:59:57 +0000 | [diff] [blame] | 59 | ** The sqlite3KeywordCode function looks up an identifier to determine if |
| 60 | ** it is a keyword. If it is a keyword, the token code of that keyword is |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 61 | ** returned. If the input is not a keyword, TK_ID is returned. |
drh | 2090a0e | 2004-10-07 19:03:01 +0000 | [diff] [blame] | 62 | ** |
| 63 | ** The implementation of this routine was generated by a program, |
drh | 73b211a | 2005-01-18 04:00:42 +0000 | [diff] [blame] | 64 | ** mkkeywordhash.h, located in the tool subdirectory of the distribution. |
drh | 52fb6d7 | 2004-11-03 03:59:57 +0000 | [diff] [blame] | 65 | ** The output of the mkkeywordhash.c program is written into a file |
drh | 73b211a | 2005-01-18 04:00:42 +0000 | [diff] [blame] | 66 | ** named keywordhash.h and then included into this source file by |
drh | 52fb6d7 | 2004-11-03 03:59:57 +0000 | [diff] [blame] | 67 | ** the #include below. |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 68 | */ |
drh | 73b211a | 2005-01-18 04:00:42 +0000 | [diff] [blame] | 69 | #include "keywordhash.h" |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 70 | |
drh | 40f20f7 | 2004-10-23 05:10:18 +0000 | [diff] [blame] | 71 | |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 72 | /* |
drh | 9b8f447 | 2006-04-04 01:54:55 +0000 | [diff] [blame] | 73 | ** If X is a character that can be used in an identifier then |
| 74 | ** IdChar(X) will be true. Otherwise it is false. |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 75 | ** |
drh | 9b8f447 | 2006-04-04 01:54:55 +0000 | [diff] [blame] | 76 | ** For ASCII, any character with the high-order bit set is |
| 77 | ** allowed in an identifier. For 7-bit characters, |
| 78 | ** sqlite3IsIdChar[X] must be 1. |
| 79 | ** |
| 80 | ** For EBCDIC, the rules are more complex but have the same |
| 81 | ** end result. |
drh | a0d1f66 | 2005-01-11 17:59:47 +0000 | [diff] [blame] | 82 | ** |
| 83 | ** Ticket #1066. the SQL standard does not allow '$' in the |
| 84 | ** middle of identfiers. But many SQL implementations do. |
| 85 | ** SQLite will allow '$' in identifiers for compatibility. |
| 86 | ** But the feature is undocumented. |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 87 | */ |
drh | 9b8f447 | 2006-04-04 01:54:55 +0000 | [diff] [blame] | 88 | #ifdef SQLITE_ASCII |
drh | a2b902d | 2005-08-14 17:53:20 +0000 | [diff] [blame] | 89 | const char sqlite3IsIdChar[] = { |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 90 | /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ |
drh | a0d1f66 | 2005-01-11 17:59:47 +0000 | [diff] [blame] | 91 | 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 92 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ |
| 93 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ |
| 94 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ |
| 95 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ |
| 96 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 97 | }; |
drh | a2b902d | 2005-08-14 17:53:20 +0000 | [diff] [blame] | 98 | #define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsIdChar[c-0x20])) |
drh | 9b8f447 | 2006-04-04 01:54:55 +0000 | [diff] [blame] | 99 | #endif |
| 100 | #ifdef SQLITE_EBCDIC |
| 101 | const char sqlite3IsIdChar[] = { |
| 102 | /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ |
| 103 | 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */ |
| 104 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */ |
| 105 | 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */ |
| 106 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */ |
| 107 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */ |
| 108 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */ |
| 109 | 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */ |
| 110 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ |
| 111 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */ |
| 112 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */ |
| 113 | 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */ |
| 114 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */ |
| 115 | }; |
| 116 | #define IdChar(C) (((c=C)>=0x42 && sqlite3IsIdChar[c-0x40])) |
| 117 | #endif |
| 118 | |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 119 | |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 120 | /* |
drh | 61b487d | 2003-09-12 02:08:14 +0000 | [diff] [blame] | 121 | ** Return the length of the token that begins at z[0]. |
| 122 | ** Store the token type in *tokenType before returning. |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 123 | */ |
danielk1977 | c60e9b8 | 2005-01-31 12:42:29 +0000 | [diff] [blame] | 124 | static int getToken(const unsigned char *z, int *tokenType){ |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 125 | int i, c; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 126 | switch( *z ){ |
drh | 30cab80 | 2000-08-09 17:17:25 +0000 | [diff] [blame] | 127 | case ' ': case '\t': case '\n': case '\f': case '\r': { |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 128 | for(i=1; isspace(z[i]); i++){} |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 129 | *tokenType = TK_SPACE; |
| 130 | return i; |
| 131 | } |
| 132 | case '-': { |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 133 | if( z[1]=='-' ){ |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 134 | for(i=2; (c=z[i])!=0 && c!='\n'; i++){} |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 135 | *tokenType = TK_COMMENT; |
| 136 | return i; |
| 137 | } |
| 138 | *tokenType = TK_MINUS; |
| 139 | return 1; |
| 140 | } |
| 141 | case '(': { |
drh | dab3518 | 2003-09-27 13:39:38 +0000 | [diff] [blame] | 142 | *tokenType = TK_LP; |
| 143 | return 1; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 144 | } |
| 145 | case ')': { |
| 146 | *tokenType = TK_RP; |
| 147 | return 1; |
| 148 | } |
| 149 | case ';': { |
| 150 | *tokenType = TK_SEMI; |
| 151 | return 1; |
| 152 | } |
| 153 | case '+': { |
| 154 | *tokenType = TK_PLUS; |
| 155 | return 1; |
| 156 | } |
| 157 | case '*': { |
| 158 | *tokenType = TK_STAR; |
| 159 | return 1; |
| 160 | } |
| 161 | case '/': { |
drh | 66105a8 | 2002-08-27 14:28:29 +0000 | [diff] [blame] | 162 | if( z[1]!='*' || z[2]==0 ){ |
| 163 | *tokenType = TK_SLASH; |
| 164 | return 1; |
| 165 | } |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 166 | for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} |
| 167 | if( c ) i++; |
drh | 66105a8 | 2002-08-27 14:28:29 +0000 | [diff] [blame] | 168 | *tokenType = TK_COMMENT; |
| 169 | return i; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 170 | } |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 171 | case '%': { |
| 172 | *tokenType = TK_REM; |
| 173 | return 1; |
| 174 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 175 | case '=': { |
| 176 | *tokenType = TK_EQ; |
| 177 | return 1 + (z[1]=='='); |
| 178 | } |
| 179 | case '<': { |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 180 | if( (c=z[1])=='=' ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 181 | *tokenType = TK_LE; |
| 182 | return 2; |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 183 | }else if( c=='>' ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 184 | *tokenType = TK_NE; |
| 185 | return 2; |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 186 | }else if( c=='<' ){ |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 187 | *tokenType = TK_LSHIFT; |
| 188 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 189 | }else{ |
| 190 | *tokenType = TK_LT; |
| 191 | return 1; |
| 192 | } |
| 193 | } |
| 194 | case '>': { |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 195 | if( (c=z[1])=='=' ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 196 | *tokenType = TK_GE; |
| 197 | return 2; |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 198 | }else if( c=='>' ){ |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 199 | *tokenType = TK_RSHIFT; |
| 200 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 201 | }else{ |
| 202 | *tokenType = TK_GT; |
| 203 | return 1; |
| 204 | } |
| 205 | } |
| 206 | case '!': { |
| 207 | if( z[1]!='=' ){ |
| 208 | *tokenType = TK_ILLEGAL; |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 209 | return 2; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 210 | }else{ |
| 211 | *tokenType = TK_NE; |
| 212 | return 2; |
| 213 | } |
| 214 | } |
drh | 0040077 | 2000-06-16 20:51:26 +0000 | [diff] [blame] | 215 | case '|': { |
| 216 | if( z[1]!='|' ){ |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 217 | *tokenType = TK_BITOR; |
drh | 0040077 | 2000-06-16 20:51:26 +0000 | [diff] [blame] | 218 | return 1; |
| 219 | }else{ |
| 220 | *tokenType = TK_CONCAT; |
| 221 | return 2; |
| 222 | } |
| 223 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 224 | case ',': { |
| 225 | *tokenType = TK_COMMA; |
| 226 | return 1; |
| 227 | } |
drh | bf4133c | 2001-10-13 02:59:08 +0000 | [diff] [blame] | 228 | case '&': { |
| 229 | *tokenType = TK_BITAND; |
| 230 | return 1; |
| 231 | } |
| 232 | case '~': { |
| 233 | *tokenType = TK_BITNOT; |
| 234 | return 1; |
| 235 | } |
drh | 3d94662 | 2005-08-13 18:15:42 +0000 | [diff] [blame] | 236 | case '`': |
| 237 | case '\'': |
| 238 | case '"': { |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 239 | int delim = z[0]; |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 240 | for(i=1; (c=z[i])!=0; i++){ |
| 241 | if( c==delim ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 242 | if( z[i+1]==delim ){ |
| 243 | i++; |
| 244 | }else{ |
| 245 | break; |
| 246 | } |
| 247 | } |
| 248 | } |
drh | eef8b55 | 2005-10-23 11:29:40 +0000 | [diff] [blame] | 249 | if( c ){ |
| 250 | *tokenType = TK_STRING; |
| 251 | return i+1; |
| 252 | }else{ |
| 253 | *tokenType = TK_ILLEGAL; |
| 254 | return i; |
| 255 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 256 | } |
| 257 | case '.': { |
drh | 7681618 | 2005-08-23 11:31:26 +0000 | [diff] [blame] | 258 | #ifndef SQLITE_OMIT_FLOATING_POINT |
| 259 | if( !isdigit(z[1]) ) |
| 260 | #endif |
| 261 | { |
| 262 | *tokenType = TK_DOT; |
| 263 | return 1; |
| 264 | } |
| 265 | /* If the next character is a digit, this is a floating point |
| 266 | ** number that begins with ".". Fall thru into the next case */ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 267 | } |
| 268 | case '0': case '1': case '2': case '3': case '4': |
| 269 | case '5': case '6': case '7': case '8': case '9': { |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 270 | *tokenType = TK_INTEGER; |
drh | 7681618 | 2005-08-23 11:31:26 +0000 | [diff] [blame] | 271 | for(i=0; isdigit(z[i]); i++){} |
drh | b7f9164 | 2004-10-31 02:22:47 +0000 | [diff] [blame] | 272 | #ifndef SQLITE_OMIT_FLOATING_POINT |
drh | 7681618 | 2005-08-23 11:31:26 +0000 | [diff] [blame] | 273 | if( z[i]=='.' ){ |
| 274 | i++; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 275 | while( isdigit(z[i]) ){ i++; } |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 276 | *tokenType = TK_FLOAT; |
| 277 | } |
| 278 | if( (z[i]=='e' || z[i]=='E') && |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 279 | ( isdigit(z[i+1]) |
| 280 | || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) |
| 281 | ) |
drh | c837e70 | 2000-06-08 16:26:24 +0000 | [diff] [blame] | 282 | ){ |
| 283 | i += 2; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 284 | while( isdigit(z[i]) ){ i++; } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 285 | *tokenType = TK_FLOAT; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 286 | } |
drh | b7f9164 | 2004-10-31 02:22:47 +0000 | [diff] [blame] | 287 | #endif |
drh | 67dd901 | 2006-08-12 12:33:14 +0000 | [diff] [blame] | 288 | while( IdChar(z[i]) ){ |
| 289 | *tokenType = TK_ILLEGAL; |
| 290 | i++; |
| 291 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 292 | return i; |
| 293 | } |
drh | 2f4392f | 2002-02-14 21:42:51 +0000 | [diff] [blame] | 294 | case '[': { |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 295 | for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} |
drh | 2f4392f | 2002-02-14 21:42:51 +0000 | [diff] [blame] | 296 | *tokenType = TK_ID; |
| 297 | return i; |
| 298 | } |
drh | 7c972de | 2003-09-06 22:18:07 +0000 | [diff] [blame] | 299 | case '?': { |
drh | 5045789 | 2003-09-06 01:10:47 +0000 | [diff] [blame] | 300 | *tokenType = TK_VARIABLE; |
drh | fa6bc00 | 2004-09-07 16:19:52 +0000 | [diff] [blame] | 301 | for(i=1; isdigit(z[i]); i++){} |
| 302 | return i; |
drh | 5045789 | 2003-09-06 01:10:47 +0000 | [diff] [blame] | 303 | } |
drh | 288d37f | 2005-06-22 08:48:06 +0000 | [diff] [blame] | 304 | case '#': { |
| 305 | for(i=1; isdigit(z[i]); i++){} |
| 306 | if( i>1 ){ |
| 307 | /* Parameters of the form #NNN (where NNN is a number) are used |
| 308 | ** internally by sqlite3NestedParse. */ |
| 309 | *tokenType = TK_REGISTER; |
| 310 | return i; |
| 311 | } |
| 312 | /* Fall through into the next case if the '#' is not followed by |
| 313 | ** a digit. Try to match #AAAA where AAAA is a parameter name. */ |
drh | 895d747 | 2004-08-20 16:02:39 +0000 | [diff] [blame] | 314 | } |
drh | b7f9164 | 2004-10-31 02:22:47 +0000 | [diff] [blame] | 315 | #ifndef SQLITE_OMIT_TCL_VARIABLE |
drh | 288d37f | 2005-06-22 08:48:06 +0000 | [diff] [blame] | 316 | case '$': |
| 317 | #endif |
drh | 0b2a5ee | 2006-02-09 22:24:41 +0000 | [diff] [blame] | 318 | case '@': /* For compatibility with MS SQL Server */ |
drh | 288d37f | 2005-06-22 08:48:06 +0000 | [diff] [blame] | 319 | case ':': { |
| 320 | int n = 0; |
drh | 9d74b4c | 2004-08-24 15:23:34 +0000 | [diff] [blame] | 321 | *tokenType = TK_VARIABLE; |
drh | 288d37f | 2005-06-22 08:48:06 +0000 | [diff] [blame] | 322 | for(i=1; (c=z[i])!=0; i++){ |
| 323 | if( IdChar(c) ){ |
| 324 | n++; |
| 325 | #ifndef SQLITE_OMIT_TCL_VARIABLE |
| 326 | }else if( c=='(' && n>0 ){ |
| 327 | do{ |
| 328 | i++; |
| 329 | }while( (c=z[i])!=0 && !isspace(c) && c!=')' ); |
| 330 | if( c==')' ){ |
drh | 895d747 | 2004-08-20 16:02:39 +0000 | [diff] [blame] | 331 | i++; |
| 332 | }else{ |
drh | 288d37f | 2005-06-22 08:48:06 +0000 | [diff] [blame] | 333 | *tokenType = TK_ILLEGAL; |
drh | 895d747 | 2004-08-20 16:02:39 +0000 | [diff] [blame] | 334 | } |
drh | 288d37f | 2005-06-22 08:48:06 +0000 | [diff] [blame] | 335 | break; |
| 336 | }else if( c==':' && z[i+1]==':' ){ |
| 337 | i++; |
| 338 | #endif |
| 339 | }else{ |
| 340 | break; |
drh | 895d747 | 2004-08-20 16:02:39 +0000 | [diff] [blame] | 341 | } |
| 342 | } |
drh | 288d37f | 2005-06-22 08:48:06 +0000 | [diff] [blame] | 343 | if( n==0 ) *tokenType = TK_ILLEGAL; |
drh | 895d747 | 2004-08-20 16:02:39 +0000 | [diff] [blame] | 344 | return i; |
drh | b7f9164 | 2004-10-31 02:22:47 +0000 | [diff] [blame] | 345 | } |
drh | b7f9164 | 2004-10-31 02:22:47 +0000 | [diff] [blame] | 346 | #ifndef SQLITE_OMIT_BLOB_LITERAL |
danielk1977 | c572ef7 | 2004-05-27 09:28:41 +0000 | [diff] [blame] | 347 | case 'x': case 'X': { |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 348 | if( (c=z[1])=='\'' || c=='"' ){ |
| 349 | int delim = c; |
danielk1977 | 3fd0a73 | 2004-05-27 13:35:19 +0000 | [diff] [blame] | 350 | *tokenType = TK_BLOB; |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 351 | for(i=2; (c=z[i])!=0; i++){ |
| 352 | if( c==delim ){ |
danielk1977 | 3fd0a73 | 2004-05-27 13:35:19 +0000 | [diff] [blame] | 353 | if( i%2 ) *tokenType = TK_ILLEGAL; |
danielk1977 | c572ef7 | 2004-05-27 09:28:41 +0000 | [diff] [blame] | 354 | break; |
| 355 | } |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 356 | if( !isxdigit(c) ){ |
danielk1977 | 3fd0a73 | 2004-05-27 13:35:19 +0000 | [diff] [blame] | 357 | *tokenType = TK_ILLEGAL; |
| 358 | return i; |
| 359 | } |
danielk1977 | c572ef7 | 2004-05-27 09:28:41 +0000 | [diff] [blame] | 360 | } |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 361 | if( c ) i++; |
danielk1977 | c572ef7 | 2004-05-27 09:28:41 +0000 | [diff] [blame] | 362 | return i; |
| 363 | } |
| 364 | /* Otherwise fall through to the next case */ |
| 365 | } |
drh | b7f9164 | 2004-10-31 02:22:47 +0000 | [diff] [blame] | 366 | #endif |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 367 | default: { |
drh | eab7f3f | 2007-05-15 09:00:14 +0000 | [diff] [blame] | 368 | if( !IdChar(*z) || (*z & 0xc0)==0x80 ){ |
drh | 98808ba | 2001-10-18 12:34:46 +0000 | [diff] [blame] | 369 | break; |
| 370 | } |
drh | aa756b0 | 2004-09-25 15:25:26 +0000 | [diff] [blame] | 371 | for(i=1; IdChar(z[i]); i++){} |
danielk1977 | c60e9b8 | 2005-01-31 12:42:29 +0000 | [diff] [blame] | 372 | *tokenType = keywordCode((char*)z, i); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 373 | return i; |
| 374 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 375 | } |
| 376 | *tokenType = TK_ILLEGAL; |
| 377 | return 1; |
| 378 | } |
danielk1977 | c60e9b8 | 2005-01-31 12:42:29 +0000 | [diff] [blame] | 379 | int sqlite3GetToken(const unsigned char *z, int *tokenType){ |
| 380 | return getToken(z, tokenType); |
| 381 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 382 | |
| 383 | /* |
| 384 | ** Run the parser on the given SQL string. The parser structure is |
drh | b19a2bc | 2001-09-16 00:13:26 +0000 | [diff] [blame] | 385 | ** passed in. An SQLITE_ status code is returned. If an error occurs |
| 386 | ** and pzErrMsg!=NULL then an error message might be written into |
| 387 | ** memory obtained from malloc() and *pzErrMsg made to point to that |
| 388 | ** error message. Or maybe not. |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 389 | */ |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 390 | int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 391 | int nErr = 0; |
| 392 | int i; |
| 393 | void *pEngine; |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 394 | int tokenType; |
| 395 | int lastTokenParsed = -1; |
drh | 9bb575f | 2004-09-06 17:24:11 +0000 | [diff] [blame] | 396 | sqlite3 *db = pParse->db; |
drh | 23a4d14 | 2007-01-26 19:31:00 +0000 | [diff] [blame] | 397 | extern void *sqlite3ParserAlloc(void*(*)(size_t)); |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 398 | extern void sqlite3ParserFree(void*, void(*)(void*)); |
drh | 23a4d14 | 2007-01-26 19:31:00 +0000 | [diff] [blame] | 399 | extern void sqlite3Parser(void*, int, Token, Parse*); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 400 | |
drh | 15ca1df | 2006-07-26 13:43:30 +0000 | [diff] [blame] | 401 | if( db->activeVdbeCnt==0 ){ |
| 402 | db->u1.isInterrupted = 0; |
| 403 | } |
drh | 4c50439 | 2000-10-16 22:06:40 +0000 | [diff] [blame] | 404 | pParse->rc = SQLITE_OK; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 405 | i = 0; |
drh | 23a4d14 | 2007-01-26 19:31:00 +0000 | [diff] [blame] | 406 | pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3MallocX); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 407 | if( pEngine==0 ){ |
drh | defc997 | 2005-06-06 14:45:42 +0000 | [diff] [blame] | 408 | return SQLITE_NOMEM; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 409 | } |
drh | fa6bc00 | 2004-09-07 16:19:52 +0000 | [diff] [blame] | 410 | assert( pParse->sLastToken.dyn==0 ); |
| 411 | assert( pParse->pNewTable==0 ); |
| 412 | assert( pParse->pNewTrigger==0 ); |
| 413 | assert( pParse->nVar==0 ); |
| 414 | assert( pParse->nVarExpr==0 ); |
| 415 | assert( pParse->nVarExprAlloc==0 ); |
| 416 | assert( pParse->apVarExpr==0 ); |
drh | 3f7d4e4 | 2004-07-24 14:35:58 +0000 | [diff] [blame] | 417 | pParse->zTail = pParse->zSql = zSql; |
danielk1977 | 9e12800 | 2006-01-18 16:51:35 +0000 | [diff] [blame] | 418 | while( !sqlite3MallocFailed() && zSql[i]!=0 ){ |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 419 | assert( i>=0 ); |
drh | 2646da7 | 2005-12-09 20:02:05 +0000 | [diff] [blame] | 420 | pParse->sLastToken.z = (u8*)&zSql[i]; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 421 | assert( pParse->sLastToken.dyn==0 ); |
danielk1977 | c60e9b8 | 2005-01-31 12:42:29 +0000 | [diff] [blame] | 422 | pParse->sLastToken.n = getToken((unsigned char*)&zSql[i],&tokenType); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 423 | i += pParse->sLastToken.n; |
drh | e5c941b | 2007-05-08 13:58:26 +0000 | [diff] [blame] | 424 | if( i>SQLITE_MAX_SQL_LENGTH ){ |
| 425 | pParse->rc = SQLITE_TOOBIG; |
| 426 | break; |
| 427 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 428 | switch( tokenType ){ |
| 429 | case TK_SPACE: |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 430 | case TK_COMMENT: { |
drh | 881feaa | 2006-07-26 01:39:30 +0000 | [diff] [blame] | 431 | if( db->u1.isInterrupted ){ |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 432 | pParse->rc = SQLITE_INTERRUPT; |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 433 | sqlite3SetString(pzErrMsg, "interrupt", (char*)0); |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 434 | goto abort_parse; |
| 435 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 436 | break; |
| 437 | } |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 438 | case TK_ILLEGAL: { |
drh | ae29ffb | 2004-09-25 14:39:18 +0000 | [diff] [blame] | 439 | if( pzErrMsg ){ |
| 440 | sqliteFree(*pzErrMsg); |
| 441 | *pzErrMsg = sqlite3MPrintf("unrecognized token: \"%T\"", |
| 442 | &pParse->sLastToken); |
| 443 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 444 | nErr++; |
drh | caec2f1 | 2003-01-07 02:47:47 +0000 | [diff] [blame] | 445 | goto abort_parse; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 446 | } |
drh | 326dce7 | 2003-01-29 14:06:07 +0000 | [diff] [blame] | 447 | case TK_SEMI: { |
| 448 | pParse->zTail = &zSql[i]; |
| 449 | /* Fall thru into the default case */ |
| 450 | } |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 451 | default: { |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 452 | sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 453 | lastTokenParsed = tokenType; |
| 454 | if( pParse->rc!=SQLITE_OK ){ |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 455 | goto abort_parse; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 456 | } |
| 457 | break; |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 458 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 459 | } |
| 460 | } |
drh | 32eb7b4 | 2003-01-07 01:44:37 +0000 | [diff] [blame] | 461 | abort_parse: |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 462 | if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ |
| 463 | if( lastTokenParsed!=TK_SEMI ){ |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 464 | sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); |
drh | 326dce7 | 2003-01-29 14:06:07 +0000 | [diff] [blame] | 465 | pParse->zTail = &zSql[i]; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 466 | } |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 467 | sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 468 | } |
drh | 132d8d6 | 2005-05-22 20:12:37 +0000 | [diff] [blame] | 469 | sqlite3ParserFree(pEngine, sqlite3FreeX); |
danielk1977 | 9e12800 | 2006-01-18 16:51:35 +0000 | [diff] [blame] | 470 | if( sqlite3MallocFailed() ){ |
drh | 71c697e | 2004-08-08 23:39:19 +0000 | [diff] [blame] | 471 | pParse->rc = SQLITE_NOMEM; |
| 472 | } |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 473 | if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ |
danielk1977 | 261919c | 2005-12-06 12:52:59 +0000 | [diff] [blame] | 474 | sqlite3SetString(&pParse->zErrMsg, sqlite3ErrStr(pParse->rc), (char*)0); |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 475 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 476 | if( pParse->zErrMsg ){ |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 477 | if( pzErrMsg && *pzErrMsg==0 ){ |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 478 | *pzErrMsg = pParse->zErrMsg; |
| 479 | }else{ |
| 480 | sqliteFree(pParse->zErrMsg); |
| 481 | } |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 482 | pParse->zErrMsg = 0; |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 483 | if( !nErr ) nErr++; |
| 484 | } |
drh | 2958a4e | 2004-11-12 03:56:15 +0000 | [diff] [blame] | 485 | if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){ |
danielk1977 | 4adee20 | 2004-05-08 08:23:19 +0000 | [diff] [blame] | 486 | sqlite3VdbeDelete(pParse->pVdbe); |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 487 | pParse->pVdbe = 0; |
| 488 | } |
danielk1977 | c00da10 | 2006-01-07 13:21:04 +0000 | [diff] [blame] | 489 | #ifndef SQLITE_OMIT_SHARED_CACHE |
| 490 | if( pParse->nested==0 ){ |
| 491 | sqliteFree(pParse->aTableLock); |
| 492 | pParse->aTableLock = 0; |
| 493 | pParse->nTableLock = 0; |
| 494 | } |
| 495 | #endif |
danielk1977 | 7e6ebfb | 2006-06-12 11:24:37 +0000 | [diff] [blame] | 496 | |
danielk1977 | 7e6ebfb | 2006-06-12 11:24:37 +0000 | [diff] [blame] | 497 | if( !IN_DECLARE_VTAB ){ |
| 498 | /* If the pParse->declareVtab flag is set, do not delete any table |
| 499 | ** structure built up in pParse->pNewTable. The calling code (see vtab.c) |
| 500 | ** will take responsibility for freeing the Table structure. |
| 501 | */ |
danielk1977 | a04a34f | 2007-04-16 15:06:25 +0000 | [diff] [blame] | 502 | sqlite3DeleteTable(pParse->pNewTable); |
danielk1977 | 7e6ebfb | 2006-06-12 11:24:37 +0000 | [diff] [blame] | 503 | } |
| 504 | |
drh | fa6bc00 | 2004-09-07 16:19:52 +0000 | [diff] [blame] | 505 | sqlite3DeleteTrigger(pParse->pNewTrigger); |
| 506 | sqliteFree(pParse->apVarExpr); |
drh | b86ccfb | 2003-01-28 23:13:10 +0000 | [diff] [blame] | 507 | if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){ |
drh | 4c50439 | 2000-10-16 22:06:40 +0000 | [diff] [blame] | 508 | pParse->rc = SQLITE_ERROR; |
| 509 | } |
drh | 7589723 | 2000-05-29 14:26:00 +0000 | [diff] [blame] | 510 | return nErr; |
| 511 | } |