drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 1 | /* |
| 2 | ** 2016-05-28 |
| 3 | ** |
| 4 | ** The author disclaims copyright to this source code. In place of |
| 5 | ** a legal notice, here is a blessing: |
| 6 | ** |
| 7 | ** May you do good and not evil. |
| 8 | ** May you find forgiveness for yourself and forgive others. |
| 9 | ** May you share freely, never taking more than you give. |
| 10 | ** |
| 11 | ****************************************************************************** |
| 12 | ** |
| 13 | ** This file contains the implementation of an SQLite virtual table for |
| 14 | ** reading CSV files. |
| 15 | ** |
| 16 | ** Usage: |
| 17 | ** |
| 18 | ** .load ./csv |
| 19 | ** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME); |
| 20 | ** SELECT * FROM csv; |
| 21 | ** |
| 22 | ** The columns are named "c1", "c2", "c3", ... by default. But the |
| 23 | ** application can define its own CREATE TABLE statement as an additional |
| 24 | ** parameter. For example: |
| 25 | ** |
| 26 | ** CREATE VIRTUAL TABLE temp.csv2 USING csv( |
| 27 | ** filename = "../http.log", |
| 28 | ** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)" |
| 29 | ** ); |
| 30 | */ |
| 31 | #include <sqlite3ext.h> |
| 32 | SQLITE_EXTENSION_INIT1 |
| 33 | #include <string.h> |
| 34 | #include <stdlib.h> |
| 35 | #include <assert.h> |
| 36 | #include <stdarg.h> |
| 37 | #include <ctype.h> |
| 38 | #include <stdio.h> |
| 39 | |
| 40 | /* |
| 41 | ** A macro to hint to the compiler that a function should not be |
| 42 | ** inlined. |
| 43 | */ |
| 44 | #if defined(__GNUC__) |
| 45 | # define CSV_NOINLINE __attribute__((noinline)) |
| 46 | #elif defined(_MSC_VER) && _MSC_VER>=1310 |
| 47 | # define CSV_NOINLINE __declspec(noinline) |
| 48 | #else |
| 49 | # define CSV_NOINLINE |
| 50 | #endif |
| 51 | |
| 52 | |
| 53 | /* Max size of the error message in a CsvReader */ |
| 54 | #define CSV_MXERR 200 |
| 55 | |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 56 | /* Size of the CsvReader input buffer */ |
| 57 | #define CSV_INBUFSZ 1024 |
| 58 | |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 59 | /* A context object used when read a CSV file. */ |
| 60 | typedef struct CsvReader CsvReader; |
| 61 | struct CsvReader { |
| 62 | FILE *in; /* Read the CSV text from this input stream */ |
| 63 | char *z; /* Accumulated text for a field */ |
| 64 | int n; /* Number of bytes in z */ |
| 65 | int nAlloc; /* Space allocated for z[] */ |
| 66 | int nLine; /* Current line number */ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 67 | char cTerm; /* Character that terminated the most recent field */ |
| 68 | size_t iIn; /* Next unread character in the input buffer */ |
| 69 | size_t nIn; /* Number of characters in the input buffer */ |
| 70 | char *zIn; /* The input buffer */ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 71 | char zErr[CSV_MXERR]; /* Error message */ |
| 72 | }; |
| 73 | |
| 74 | /* Initialize a CsvReader object */ |
| 75 | static void csv_reader_init(CsvReader *p){ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 76 | p->in = 0; |
| 77 | p->z = 0; |
| 78 | p->n = 0; |
| 79 | p->nAlloc = 0; |
| 80 | p->nLine = 0; |
| 81 | p->nIn = 0; |
| 82 | p->zIn = 0; |
| 83 | p->zErr[0] = 0; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 84 | } |
| 85 | |
| 86 | /* Close and reset a CsvReader object */ |
| 87 | static void csv_reader_reset(CsvReader *p){ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 88 | if( p->in ){ |
| 89 | fclose(p->in); |
| 90 | sqlite3_free(p->zIn); |
| 91 | } |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 92 | sqlite3_free(p->z); |
| 93 | csv_reader_init(p); |
| 94 | } |
| 95 | |
| 96 | /* Report an error on a CsvReader */ |
| 97 | static void csv_errmsg(CsvReader *p, const char *zFormat, ...){ |
| 98 | va_list ap; |
| 99 | va_start(ap, zFormat); |
| 100 | sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap); |
| 101 | va_end(ap); |
| 102 | } |
| 103 | |
| 104 | /* Open the file associated with a CsvReader |
| 105 | ** Return the number of errors. |
| 106 | */ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 107 | static int csv_reader_open( |
| 108 | CsvReader *p, /* The reader to open */ |
| 109 | const char *zFilename, /* Read from this filename */ |
| 110 | const char *zData /* ... or use this data */ |
| 111 | ){ |
| 112 | if( zFilename ){ |
| 113 | p->zIn = sqlite3_malloc( CSV_INBUFSZ ); |
| 114 | if( p->zIn==0 ){ |
| 115 | csv_errmsg(p, "out of memory"); |
| 116 | return 1; |
| 117 | } |
| 118 | p->in = fopen(zFilename, "rb"); |
| 119 | if( p->in==0 ){ |
| 120 | csv_reader_reset(p); |
| 121 | csv_errmsg(p, "cannot open '%s' for reading", zFilename); |
| 122 | return 1; |
| 123 | } |
| 124 | }else{ |
| 125 | assert( p->in==0 ); |
| 126 | p->zIn = (char*)zData; |
| 127 | p->nIn = strlen(zData); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 128 | } |
| 129 | return 0; |
| 130 | } |
| 131 | |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 132 | /* The input buffer has overflowed. Refill the input buffer, then |
| 133 | ** return the next character |
| 134 | */ |
| 135 | static CSV_NOINLINE int csv_getc_refill(CsvReader *p){ |
| 136 | size_t got; |
| 137 | |
| 138 | assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */ |
| 139 | assert( p->in!=0 ); /* Only called if reading froma file */ |
| 140 | |
| 141 | got = fread(p->zIn, 1, CSV_INBUFSZ, p->in); |
| 142 | if( got==0 ) return EOF; |
| 143 | p->nIn = got; |
| 144 | p->iIn = 1; |
| 145 | return p->zIn[0]; |
| 146 | } |
| 147 | |
| 148 | /* Return the next character of input. Return EOF at end of input. */ |
| 149 | static int csv_getc(CsvReader *p){ |
| 150 | if( p->iIn >= p->nIn ){ |
| 151 | if( p->in!=0 ) return csv_getc_refill(p); |
| 152 | return EOF; |
| 153 | } |
| 154 | return p->zIn[p->iIn++]; |
| 155 | } |
| 156 | |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 157 | /* Increase the size of p->z and append character c to the end. |
| 158 | ** Return 0 on success and non-zero if there is an OOM error */ |
| 159 | static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){ |
| 160 | char *zNew; |
| 161 | int nNew = p->nAlloc*2 + 100; |
| 162 | zNew = sqlite3_realloc64(p->z, nNew); |
| 163 | if( zNew ){ |
| 164 | p->z = zNew; |
| 165 | p->nAlloc = nNew; |
| 166 | p->z[p->n++] = c; |
| 167 | return 0; |
| 168 | }else{ |
| 169 | csv_errmsg(p, "out of memory"); |
| 170 | return 1; |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | /* Append a single character to the CsvReader.z[] array. |
| 175 | ** Return 0 on success and non-zero if there is an OOM error */ |
| 176 | static int csv_append(CsvReader *p, char c){ |
| 177 | if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c); |
| 178 | p->z[p->n++] = c; |
| 179 | return 0; |
| 180 | } |
| 181 | |
| 182 | /* Read a single field of CSV text. Compatible with rfc4180 and extended |
| 183 | ** with the option of having a separator other than ",". |
| 184 | ** |
| 185 | ** + Input comes from p->in. |
| 186 | ** + Store results in p->z of length p->n. Space to hold p->z comes |
| 187 | ** from sqlite3_malloc64(). |
| 188 | ** + Keep track of the line number in p->nLine. |
| 189 | ** + Store the character that terminates the field in p->cTerm. Store |
| 190 | ** EOF on end-of-file. |
| 191 | ** |
| 192 | ** Return "" at EOF. Return 0 on an OOM error. |
| 193 | */ |
| 194 | static char *csv_read_one_field(CsvReader *p){ |
| 195 | int c; |
| 196 | p->n = 0; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 197 | c = csv_getc(p); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 198 | if( c==EOF ){ |
| 199 | p->cTerm = EOF; |
| 200 | return ""; |
| 201 | } |
| 202 | if( c=='"' ){ |
| 203 | int pc, ppc; |
| 204 | int startLine = p->nLine; |
| 205 | int cQuote = c; |
| 206 | pc = ppc = 0; |
| 207 | while( 1 ){ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 208 | c = csv_getc(p); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 209 | if( c=='\n' ) p->nLine++; |
| 210 | if( c==cQuote ){ |
| 211 | if( pc==cQuote ){ |
| 212 | pc = 0; |
| 213 | continue; |
| 214 | } |
| 215 | } |
| 216 | if( (c==',' && pc==cQuote) |
| 217 | || (c=='\n' && pc==cQuote) |
| 218 | || (c=='\n' && pc=='\r' && ppc==cQuote) |
| 219 | || (c==EOF && pc==cQuote) |
| 220 | ){ |
| 221 | do{ p->n--; }while( p->z[p->n]!=cQuote ); |
| 222 | p->cTerm = c; |
| 223 | break; |
| 224 | } |
| 225 | if( pc==cQuote && c!='\r' ){ |
| 226 | csv_errmsg(p, "line %d: unescaped %c character", p->nLine, cQuote); |
| 227 | break; |
| 228 | } |
| 229 | if( c==EOF ){ |
| 230 | csv_errmsg(p, "line %d: unterminated %c-quoted field\n", |
| 231 | startLine, cQuote); |
| 232 | p->cTerm = c; |
| 233 | break; |
| 234 | } |
| 235 | if( csv_append(p, (char)c) ) return 0; |
| 236 | ppc = pc; |
| 237 | pc = c; |
| 238 | } |
| 239 | }else{ |
| 240 | while( c!=EOF && c!=',' && c!='\n' ){ |
| 241 | if( csv_append(p, (char)c) ) return 0; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 242 | c = csv_getc(p); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 243 | } |
| 244 | if( c=='\n' ){ |
| 245 | p->nLine++; |
| 246 | if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; |
| 247 | } |
| 248 | p->cTerm = c; |
| 249 | } |
| 250 | if( p->z ) p->z[p->n] = 0; |
| 251 | return p->z; |
| 252 | } |
| 253 | |
| 254 | |
| 255 | /* Forward references to the various virtual table methods implemented |
| 256 | ** in this file. */ |
| 257 | static int csvtabCreate(sqlite3*, void*, int, const char*const*, |
| 258 | sqlite3_vtab**,char**); |
| 259 | static int csvtabConnect(sqlite3*, void*, int, const char*const*, |
| 260 | sqlite3_vtab**,char**); |
| 261 | static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*); |
| 262 | static int csvtabDisconnect(sqlite3_vtab*); |
| 263 | static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**); |
| 264 | static int csvtabClose(sqlite3_vtab_cursor*); |
| 265 | static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, |
| 266 | int argc, sqlite3_value **argv); |
| 267 | static int csvtabNext(sqlite3_vtab_cursor*); |
| 268 | static int csvtabEof(sqlite3_vtab_cursor*); |
| 269 | static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int); |
| 270 | static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*); |
| 271 | |
| 272 | /* An instance of the CSV virtual table */ |
| 273 | typedef struct CsvTable { |
| 274 | sqlite3_vtab base; /* Base class. Must be first */ |
| 275 | char *zFilename; /* Name of the CSV file */ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 276 | char *zData; /* Raw CSV data in lieu of zFilename */ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 277 | long iStart; /* Offset to start of data in zFilename */ |
| 278 | int nCol; /* Number of columns in the CSV file */ |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 279 | unsigned int tstFlags; /* Bit values used for testing */ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 280 | } CsvTable; |
| 281 | |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 282 | /* Allowed values for tstFlags */ |
| 283 | #define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/ |
| 284 | |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 285 | /* A cursor for the CSV virtual table */ |
| 286 | typedef struct CsvCursor { |
| 287 | sqlite3_vtab_cursor base; /* Base class. Must be first */ |
| 288 | CsvReader rdr; /* The CsvReader object */ |
| 289 | char **azVal; /* Value of the current row */ |
| 290 | sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */ |
| 291 | } CsvCursor; |
| 292 | |
| 293 | /* Transfer error message text from a reader into a CsvTable */ |
| 294 | static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){ |
| 295 | sqlite3_free(pTab->base.zErrMsg); |
| 296 | pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr); |
| 297 | } |
| 298 | |
| 299 | /* |
| 300 | ** This method is the destructor fo a CsvTable object. |
| 301 | */ |
| 302 | static int csvtabDisconnect(sqlite3_vtab *pVtab){ |
| 303 | CsvTable *p = (CsvTable*)pVtab; |
| 304 | sqlite3_free(p->zFilename); |
| 305 | sqlite3_free(p); |
| 306 | return SQLITE_OK; |
| 307 | } |
| 308 | |
| 309 | /* Skip leading whitespace. Return a pointer to the first non-whitespace |
| 310 | ** character, or to the zero terminator if the string has only whitespace */ |
| 311 | static const char *csv_skip_whitespace(const char *z){ |
| 312 | while( isspace((unsigned char)z[0]) ) z++; |
| 313 | return z; |
| 314 | } |
| 315 | |
| 316 | /* Remove trailing whitespace from the end of string z[] */ |
| 317 | static void csv_trim_whitespace(char *z){ |
| 318 | size_t n = strlen(z); |
| 319 | while( n>0 && isspace((unsigned char)z[n]) ) n--; |
| 320 | z[n] = 0; |
| 321 | } |
| 322 | |
| 323 | /* Dequote the string */ |
| 324 | static void csv_dequote(char *z){ |
| 325 | int i, j; |
| 326 | char cQuote = z[0]; |
| 327 | size_t n; |
| 328 | |
| 329 | if( cQuote!='\'' && cQuote!='"' ) return; |
| 330 | n = strlen(z); |
| 331 | if( n<2 || z[n-1]!=z[0] ) return; |
| 332 | for(i=1, j=0; i<n-1; i++){ |
| 333 | if( z[i]==cQuote && z[i+1]==cQuote ) i++; |
| 334 | z[j++] = z[i]; |
| 335 | } |
| 336 | z[j] = 0; |
| 337 | } |
| 338 | |
| 339 | /* Check to see if the string is of the form: "TAG = VALUE" with optional |
| 340 | ** whitespace before and around tokens. If it is, return a pointer to the |
| 341 | ** first character of VALUE. If it is not, return NULL. |
| 342 | */ |
| 343 | static const char *csv_parameter(const char *zTag, int nTag, const char *z){ |
| 344 | z = csv_skip_whitespace(z); |
| 345 | if( strncmp(zTag, z, nTag)!=0 ) return 0; |
| 346 | z = csv_skip_whitespace(z+nTag); |
| 347 | if( z[0]!='=' ) return 0; |
| 348 | return csv_skip_whitespace(z+1); |
| 349 | } |
| 350 | |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 351 | /* Decode a parameter that requires a dequoted string. |
| 352 | ** |
| 353 | ** Return 1 if the parameter is seen, or 0 if not. 1 is returned |
| 354 | ** even if there is an error. If an error occurs, then an error message |
| 355 | ** is left in p->zErr. If there are no errors, p->zErr[0]==0. |
| 356 | */ |
| 357 | static int csv_string_parameter( |
| 358 | CsvReader *p, /* Leave the error message here, if there is one */ |
| 359 | const char *zParam, /* Parameter we are checking for */ |
| 360 | const char *zArg, /* Raw text of the virtual table argment */ |
| 361 | char **pzVal /* Write the dequoted string value here */ |
| 362 | ){ |
| 363 | const char *zValue; |
| 364 | zValue = csv_parameter(zParam,strlen(zParam),zArg); |
| 365 | if( zValue==0 ) return 0; |
| 366 | p->zErr[0] = 0; |
| 367 | if( *pzVal ){ |
| 368 | csv_errmsg(p, "more than one '%s' parameter", zParam); |
| 369 | return 1; |
| 370 | } |
| 371 | *pzVal = sqlite3_mprintf("%s", zValue); |
| 372 | if( *pzVal==0 ){ |
| 373 | csv_errmsg(p, "out of memory"); |
| 374 | return 1; |
| 375 | } |
| 376 | csv_trim_whitespace(*pzVal); |
| 377 | csv_dequote(*pzVal); |
| 378 | return 1; |
| 379 | } |
| 380 | |
| 381 | |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 382 | /* Return 0 if the argument is false and 1 if it is true. Return -1 if |
| 383 | ** we cannot really tell. |
| 384 | */ |
| 385 | static int csv_boolean(const char *z){ |
| 386 | if( sqlite3_stricmp("yes",z)==0 |
| 387 | || sqlite3_stricmp("on",z)==0 |
| 388 | || sqlite3_stricmp("true",z)==0 |
| 389 | || (z[0]=='1' && z[0]==0) |
| 390 | ){ |
| 391 | return 1; |
| 392 | } |
| 393 | if( sqlite3_stricmp("no",z)==0 |
| 394 | || sqlite3_stricmp("off",z)==0 |
| 395 | || sqlite3_stricmp("false",z)==0 |
| 396 | || (z[0]=='0' && z[1]==0) |
| 397 | ){ |
| 398 | return 0; |
| 399 | } |
| 400 | return -1; |
| 401 | } |
| 402 | |
| 403 | |
| 404 | /* |
| 405 | ** Parameters: |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 406 | ** filename=FILENAME Name of file containing CSV content |
| 407 | ** data=TEXT Direct CSV content. |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 408 | ** schema=SCHEMA Alternative CSV schema. |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 409 | ** header=YES|NO First row of CSV defines the names of |
| 410 | ** columns if "yes". Default "no". |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 411 | ** columns=N Assume the CSV file contains N columns. |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 412 | ** testflags=N Bitmask of test flags. Optional |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 413 | ** |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 414 | ** If schema= is omitted, then the columns are named "c0", "c1", "c2", |
| 415 | ** and so forth. If columns=N is omitted, then the file is opened and |
| 416 | ** the number of columns in the first row is counted to determine the |
| 417 | ** column count. If header=YES, then the first row is skipped. |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 418 | */ |
| 419 | static int csvtabConnect( |
| 420 | sqlite3 *db, |
| 421 | void *pAux, |
| 422 | int argc, const char *const*argv, |
| 423 | sqlite3_vtab **ppVtab, |
| 424 | char **pzErr |
| 425 | ){ |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 426 | CsvTable *pNew = 0; /* The CsvTable object to construct */ |
| 427 | int bHeader = -1; /* header= flags. -1 means not seen yet */ |
| 428 | int rc = SQLITE_OK; /* Result code from this routine */ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 429 | int i, j; /* Loop counters */ |
| 430 | int tstFlags = 0; /* Value for testflags=N parameter */ |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 431 | int nCol = -99; /* Value of the columns= parameter */ |
| 432 | CsvReader sRdr; /* A CSV file reader used to store an error |
| 433 | ** message and/or to count the number of columns */ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 434 | static const char *azParam[] = { |
| 435 | "filename", "data", "schema", |
| 436 | }; |
| 437 | char *azPValue[3]; /* Parameter values */ |
| 438 | # define CSV_FILENAME (azPValue[0]) |
| 439 | # define CSV_DATA (azPValue[1]) |
| 440 | # define CSV_SCHEMA (azPValue[2]) |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 441 | |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 442 | |
| 443 | assert( sizeof(azPValue)==sizeof(azParam) ); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 444 | memset(&sRdr, 0, sizeof(sRdr)); |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 445 | memset(azPValue, 0, sizeof(azPValue)); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 446 | for(i=3; i<argc; i++){ |
| 447 | const char *z = argv[i]; |
| 448 | const char *zValue; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 449 | for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){ |
| 450 | if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break; |
| 451 | } |
| 452 | if( j<sizeof(azParam)/sizeof(azParam[0]) ){ |
| 453 | if( sRdr.zErr[0] ) goto csvtab_connect_error; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 454 | }else |
| 455 | if( (zValue = csv_parameter("header",6,z))!=0 ){ |
| 456 | int x; |
| 457 | if( bHeader>=0 ){ |
| 458 | csv_errmsg(&sRdr, "more than one 'header' parameter"); |
| 459 | goto csvtab_connect_error; |
| 460 | } |
| 461 | x = csv_boolean(zValue); |
| 462 | if( x==1 ){ |
| 463 | bHeader = 1; |
| 464 | }else if( x==0 ){ |
| 465 | bHeader = 0; |
| 466 | }else{ |
| 467 | csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue); |
| 468 | goto csvtab_connect_error; |
| 469 | } |
| 470 | }else |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 471 | if( (zValue = csv_parameter("testflags",9,z))!=0 ){ |
| 472 | tstFlags = (unsigned int)atoi(zValue); |
| 473 | }else |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 474 | if( (zValue = csv_parameter("columns",7,z))!=0 ){ |
| 475 | if( nCol>0 ){ |
| 476 | csv_errmsg(&sRdr, "more than one 'columns' parameter"); |
| 477 | goto csvtab_connect_error; |
| 478 | } |
| 479 | nCol = atoi(zValue); |
| 480 | if( nCol<=0 ){ |
| 481 | csv_errmsg(&sRdr, "must have at least one column"); |
| 482 | goto csvtab_connect_error; |
| 483 | } |
| 484 | }else |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 485 | { |
| 486 | csv_errmsg(&sRdr, "unrecognized parameter '%s'", z); |
| 487 | goto csvtab_connect_error; |
| 488 | } |
| 489 | } |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 490 | if( (CSV_FILENAME==0)==(CSV_DATA==0) ){ |
| 491 | csv_errmsg(&sRdr, "must either filename= or data= but not both"); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 492 | goto csvtab_connect_error; |
| 493 | } |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 494 | if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 495 | goto csvtab_connect_error; |
| 496 | } |
| 497 | pNew = sqlite3_malloc( sizeof(*pNew) ); |
| 498 | *ppVtab = (sqlite3_vtab*)pNew; |
| 499 | if( pNew==0 ) goto csvtab_connect_oom; |
| 500 | memset(pNew, 0, sizeof(*pNew)); |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 501 | if( nCol>0 ){ |
| 502 | pNew->nCol = nCol; |
| 503 | }else{ |
| 504 | do{ |
| 505 | const char *z = csv_read_one_field(&sRdr); |
| 506 | if( z==0 ) goto csvtab_connect_oom; |
| 507 | pNew->nCol++; |
| 508 | }while( sRdr.cTerm==',' ); |
| 509 | } |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 510 | pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0; |
| 511 | pNew->zData = CSV_DATA; CSV_DATA = 0; |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 512 | pNew->tstFlags = tstFlags; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 513 | pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0; |
| 514 | csv_reader_reset(&sRdr); |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 515 | if( CSV_SCHEMA==0 ){ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 516 | char *zSep = ""; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 517 | CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x("); |
| 518 | if( CSV_SCHEMA==0 ) goto csvtab_connect_oom; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 519 | for(i=0; i<pNew->nCol; i++){ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 520 | CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 521 | zSep = ","; |
| 522 | } |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 523 | CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 524 | } |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 525 | rc = sqlite3_declare_vtab(db, CSV_SCHEMA); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 526 | if( rc ) goto csvtab_connect_error; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 527 | for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ |
| 528 | sqlite3_free(azPValue[i]); |
| 529 | } |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 530 | return SQLITE_OK; |
| 531 | |
| 532 | csvtab_connect_oom: |
| 533 | rc = SQLITE_NOMEM; |
| 534 | csv_errmsg(&sRdr, "out of memory"); |
| 535 | |
| 536 | csvtab_connect_error: |
| 537 | if( pNew ) csvtabDisconnect(&pNew->base); |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 538 | for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ |
| 539 | sqlite3_free(azPValue[i]); |
| 540 | } |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 541 | if( sRdr.zErr[0] ){ |
| 542 | sqlite3_free(*pzErr); |
| 543 | *pzErr = sqlite3_mprintf("%s", sRdr.zErr); |
| 544 | } |
| 545 | csv_reader_reset(&sRdr); |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 546 | if( rc==SQLITE_OK ) rc = SQLITE_ERROR; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 547 | return rc; |
| 548 | } |
| 549 | |
| 550 | /* |
| 551 | ** Reset the current row content held by a CsvCursor. |
| 552 | */ |
| 553 | static void csvtabCursorRowReset(CsvCursor *pCur){ |
| 554 | CsvTable *pTab = (CsvTable*)pCur->base.pVtab; |
| 555 | int i; |
| 556 | for(i=0; i<pTab->nCol; i++){ |
| 557 | sqlite3_free(pCur->azVal[i]); |
| 558 | pCur->azVal[i] = 0; |
| 559 | } |
| 560 | } |
| 561 | |
| 562 | /* |
| 563 | ** The xConnect and xCreate methods do the same thing, but they must be |
| 564 | ** different so that the virtual table is not an eponymous virtual table. |
| 565 | */ |
| 566 | static int csvtabCreate( |
| 567 | sqlite3 *db, |
| 568 | void *pAux, |
| 569 | int argc, const char *const*argv, |
| 570 | sqlite3_vtab **ppVtab, |
| 571 | char **pzErr |
| 572 | ){ |
| 573 | return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr); |
| 574 | } |
| 575 | |
| 576 | /* |
| 577 | ** Destructor for a CsvCursor. |
| 578 | */ |
| 579 | static int csvtabClose(sqlite3_vtab_cursor *cur){ |
| 580 | CsvCursor *pCur = (CsvCursor*)cur; |
| 581 | csvtabCursorRowReset(pCur); |
| 582 | csv_reader_reset(&pCur->rdr); |
| 583 | sqlite3_free(cur); |
| 584 | return SQLITE_OK; |
| 585 | } |
| 586 | |
| 587 | /* |
| 588 | ** Constructor for a new CsvTable cursor object. |
| 589 | */ |
| 590 | static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ |
| 591 | CsvTable *pTab = (CsvTable*)p; |
| 592 | CsvCursor *pCur; |
| 593 | pCur = sqlite3_malloc( sizeof(*pCur) * sizeof(char*)*pTab->nCol ); |
| 594 | if( pCur==0 ) return SQLITE_NOMEM; |
| 595 | memset(pCur, 0, sizeof(*pCur) + sizeof(char*)*pTab->nCol ); |
| 596 | pCur->azVal = (char**)&pCur[1]; |
| 597 | *ppCursor = &pCur->base; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 598 | if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 599 | csv_xfer_error(pTab, &pCur->rdr); |
| 600 | return SQLITE_ERROR; |
| 601 | } |
| 602 | return SQLITE_OK; |
| 603 | } |
| 604 | |
| 605 | |
| 606 | /* |
| 607 | ** Advance a CsvCursor to its next row of input. |
| 608 | ** Set the EOF marker if we reach the end of input. |
| 609 | */ |
| 610 | static int csvtabNext(sqlite3_vtab_cursor *cur){ |
| 611 | CsvCursor *pCur = (CsvCursor*)cur; |
| 612 | CsvTable *pTab = (CsvTable*)cur->pVtab; |
| 613 | int i = 0; |
| 614 | char *z; |
| 615 | csvtabCursorRowReset(pCur); |
| 616 | do{ |
| 617 | z = csv_read_one_field(&pCur->rdr); |
| 618 | if( z==0 ){ |
| 619 | csv_xfer_error(pTab, &pCur->rdr); |
| 620 | break; |
| 621 | } |
| 622 | z = sqlite3_mprintf("%s", z); |
| 623 | if( z==0 ){ |
| 624 | csv_errmsg(&pCur->rdr, "out of memory"); |
| 625 | csv_xfer_error(pTab, &pCur->rdr); |
| 626 | break; |
| 627 | } |
| 628 | if( i<pTab->nCol ){ |
| 629 | pCur->azVal[i++] = z; |
| 630 | } |
| 631 | }while( z!=0 && pCur->rdr.cTerm==',' ); |
| 632 | if( z==0 || pCur->rdr.cTerm==EOF ){ |
| 633 | pCur->iRowid = -1; |
| 634 | }else{ |
| 635 | pCur->iRowid++; |
| 636 | } |
| 637 | return SQLITE_OK; |
| 638 | } |
| 639 | |
| 640 | /* |
| 641 | ** Return values of columns for the row at which the CsvCursor |
| 642 | ** is currently pointing. |
| 643 | */ |
| 644 | static int csvtabColumn( |
| 645 | sqlite3_vtab_cursor *cur, /* The cursor */ |
| 646 | sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ |
| 647 | int i /* Which column to return */ |
| 648 | ){ |
| 649 | CsvCursor *pCur = (CsvCursor*)cur; |
| 650 | CsvTable *pTab = (CsvTable*)cur->pVtab; |
| 651 | if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){ |
| 652 | sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC); |
| 653 | } |
| 654 | return SQLITE_OK; |
| 655 | } |
| 656 | |
| 657 | /* |
| 658 | ** Return the rowid for the current row. |
| 659 | */ |
| 660 | static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ |
| 661 | CsvCursor *pCur = (CsvCursor*)cur; |
| 662 | *pRowid = pCur->iRowid; |
| 663 | return SQLITE_OK; |
| 664 | } |
| 665 | |
| 666 | /* |
| 667 | ** Return TRUE if the cursor has been moved off of the last |
| 668 | ** row of output. |
| 669 | */ |
| 670 | static int csvtabEof(sqlite3_vtab_cursor *cur){ |
| 671 | CsvCursor *pCur = (CsvCursor*)cur; |
| 672 | return pCur->iRowid<0; |
| 673 | } |
| 674 | |
| 675 | /* |
| 676 | ** Only a full table scan is supported. So xFilter simply rewinds to |
| 677 | ** the beginning. |
| 678 | */ |
| 679 | static int csvtabFilter( |
| 680 | sqlite3_vtab_cursor *pVtabCursor, |
| 681 | int idxNum, const char *idxStr, |
| 682 | int argc, sqlite3_value **argv |
| 683 | ){ |
| 684 | CsvCursor *pCur = (CsvCursor*)pVtabCursor; |
| 685 | CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab; |
| 686 | pCur->iRowid = 0; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 687 | if( pCur->rdr.in==0 ){ |
| 688 | assert( pCur->rdr.zIn==pTab->zData ); |
| 689 | assert( pTab->iStart<=pCur->rdr.nIn ); |
| 690 | pCur->rdr.iIn = pTab->iStart; |
| 691 | }else{ |
| 692 | fseek(pCur->rdr.in, pTab->iStart, SEEK_SET); |
| 693 | pCur->rdr.iIn = 0; |
| 694 | pCur->rdr.nIn = 0; |
| 695 | } |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 696 | return csvtabNext(pVtabCursor); |
| 697 | } |
| 698 | |
| 699 | /* |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 700 | ** Only a forward full table scan is supported. xBestIndex is mostly |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 701 | ** a no-op. If CSVTEST_FIDX is set, then the presence of equality |
| 702 | ** constraints lowers the estimated cost, which is fiction, but is useful |
| 703 | ** for testing certain kinds of virtual table behavior. |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 704 | */ |
| 705 | static int csvtabBestIndex( |
| 706 | sqlite3_vtab *tab, |
| 707 | sqlite3_index_info *pIdxInfo |
| 708 | ){ |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 709 | CsvTable *pTab = (CsvTable*)tab; |
| 710 | int i; |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 711 | int nConst = 0; |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 712 | pIdxInfo->estimatedCost = 1000000; |
| 713 | if( (pTab->tstFlags & CSVTEST_FIDX)==0 ){ |
| 714 | return SQLITE_OK; |
| 715 | } |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 716 | /* The usual (and sensible) case is to take the "return SQLITE_OK" above. |
| 717 | ** The code below only runs when testflags=1. The code below |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 718 | ** generates an artifical and unrealistic plan which is useful |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame^] | 719 | ** for testing virtual table logic but is not helpfulto real applications. |
| 720 | ** |
| 721 | ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual |
| 722 | ** table (even though it is not) and the cost of running the virtual table |
| 723 | ** is reduced from 1 million to just 10. The constraints are *not* marked |
| 724 | ** as omittable, however, so the query planner should still generate a |
| 725 | ** plan that gives a correct answer, even if they plan is not optimal. |
| 726 | */ |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 727 | for(i=0; i<pIdxInfo->nConstraint; i++){ |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 728 | unsigned char op; |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 729 | if( pIdxInfo->aConstraint[i].usable==0 ) continue; |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 730 | op = pIdxInfo->aConstraint[i].op; |
| 731 | if( op==SQLITE_INDEX_CONSTRAINT_EQ |
| 732 | || op==SQLITE_INDEX_CONSTRAINT_LIKE |
| 733 | || op==SQLITE_INDEX_CONSTRAINT_GLOB |
| 734 | ){ |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 735 | pIdxInfo->estimatedCost = 10; |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 736 | pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1; |
| 737 | nConst++; |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 738 | } |
| 739 | } |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 740 | return SQLITE_OK; |
| 741 | } |
| 742 | |
| 743 | |
| 744 | static sqlite3_module CsvModule = { |
| 745 | 0, /* iVersion */ |
| 746 | csvtabCreate, /* xCreate */ |
| 747 | csvtabConnect, /* xConnect */ |
| 748 | csvtabBestIndex, /* xBestIndex */ |
| 749 | csvtabDisconnect, /* xDisconnect */ |
| 750 | csvtabDisconnect, /* xDestroy */ |
| 751 | csvtabOpen, /* xOpen - open a cursor */ |
| 752 | csvtabClose, /* xClose - close a cursor */ |
| 753 | csvtabFilter, /* xFilter - configure scan constraints */ |
| 754 | csvtabNext, /* xNext - advance a cursor */ |
| 755 | csvtabEof, /* xEof - check for end of scan */ |
| 756 | csvtabColumn, /* xColumn - read data */ |
| 757 | csvtabRowid, /* xRowid - read data */ |
| 758 | 0, /* xUpdate */ |
| 759 | 0, /* xBegin */ |
| 760 | 0, /* xSync */ |
| 761 | 0, /* xCommit */ |
| 762 | 0, /* xRollback */ |
| 763 | 0, /* xFindMethod */ |
| 764 | 0, /* xRename */ |
| 765 | }; |
| 766 | |
| 767 | #ifdef _WIN32 |
| 768 | __declspec(dllexport) |
| 769 | #endif |
| 770 | /* |
| 771 | ** This routine is called when the extension is loaded. The new |
| 772 | ** CSV virtual table module is registered with the calling database |
| 773 | ** connection. |
| 774 | */ |
| 775 | int sqlite3_csv_init( |
| 776 | sqlite3 *db, |
| 777 | char **pzErrMsg, |
| 778 | const sqlite3_api_routines *pApi |
| 779 | ){ |
| 780 | SQLITE_EXTENSION_INIT2(pApi); |
| 781 | return sqlite3_create_module(db, "csv", &CsvModule, 0); |
| 782 | } |