drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 1 | /* |
| 2 | ** 2016-05-28 |
| 3 | ** |
| 4 | ** The author disclaims copyright to this source code. In place of |
| 5 | ** a legal notice, here is a blessing: |
| 6 | ** |
| 7 | ** May you do good and not evil. |
| 8 | ** May you find forgiveness for yourself and forgive others. |
| 9 | ** May you share freely, never taking more than you give. |
| 10 | ** |
| 11 | ****************************************************************************** |
| 12 | ** |
| 13 | ** This file contains the implementation of an SQLite virtual table for |
| 14 | ** reading CSV files. |
| 15 | ** |
| 16 | ** Usage: |
| 17 | ** |
| 18 | ** .load ./csv |
| 19 | ** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME); |
| 20 | ** SELECT * FROM csv; |
| 21 | ** |
| 22 | ** The columns are named "c1", "c2", "c3", ... by default. But the |
| 23 | ** application can define its own CREATE TABLE statement as an additional |
| 24 | ** parameter. For example: |
| 25 | ** |
| 26 | ** CREATE VIRTUAL TABLE temp.csv2 USING csv( |
| 27 | ** filename = "../http.log", |
| 28 | ** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)" |
| 29 | ** ); |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 30 | ** |
| 31 | ** Instead of specifying a file, the text of the CSV can be loaded using |
| 32 | ** the data= parameter. |
| 33 | ** |
| 34 | ** If the columns=N parameter is supplied, then the CSV file is assumed to have |
| 35 | ** N columns. If the columns parameter is omitted, the CSV file is opened |
| 36 | ** as soon as the virtual table is constructed and the first row of the CSV |
| 37 | ** is read in order to count the tables. |
| 38 | ** |
| 39 | ** Some extra debugging features (used for testing virtual tables) are available |
| 40 | ** if this module is compiled with -DSQLITE_TEST. |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 41 | */ |
| 42 | #include <sqlite3ext.h> |
| 43 | SQLITE_EXTENSION_INIT1 |
| 44 | #include <string.h> |
| 45 | #include <stdlib.h> |
| 46 | #include <assert.h> |
| 47 | #include <stdarg.h> |
| 48 | #include <ctype.h> |
| 49 | #include <stdio.h> |
| 50 | |
drh | eb5a549 | 2016-07-15 02:50:18 +0000 | [diff] [blame] | 51 | #ifndef SQLITE_OMIT_VIRTUALTABLE |
| 52 | |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 53 | /* |
| 54 | ** A macro to hint to the compiler that a function should not be |
| 55 | ** inlined. |
| 56 | */ |
| 57 | #if defined(__GNUC__) |
| 58 | # define CSV_NOINLINE __attribute__((noinline)) |
| 59 | #elif defined(_MSC_VER) && _MSC_VER>=1310 |
| 60 | # define CSV_NOINLINE __declspec(noinline) |
| 61 | #else |
| 62 | # define CSV_NOINLINE |
| 63 | #endif |
| 64 | |
| 65 | |
| 66 | /* Max size of the error message in a CsvReader */ |
| 67 | #define CSV_MXERR 200 |
| 68 | |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 69 | /* Size of the CsvReader input buffer */ |
| 70 | #define CSV_INBUFSZ 1024 |
| 71 | |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 72 | /* A context object used when read a CSV file. */ |
| 73 | typedef struct CsvReader CsvReader; |
| 74 | struct CsvReader { |
| 75 | FILE *in; /* Read the CSV text from this input stream */ |
| 76 | char *z; /* Accumulated text for a field */ |
| 77 | int n; /* Number of bytes in z */ |
| 78 | int nAlloc; /* Space allocated for z[] */ |
| 79 | int nLine; /* Current line number */ |
drh | d5fbde8 | 2017-06-26 18:42:23 +0000 | [diff] [blame] | 80 | int bNotFirst; /* True if prior text has been seen */ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 81 | char cTerm; /* Character that terminated the most recent field */ |
| 82 | size_t iIn; /* Next unread character in the input buffer */ |
| 83 | size_t nIn; /* Number of characters in the input buffer */ |
| 84 | char *zIn; /* The input buffer */ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 85 | char zErr[CSV_MXERR]; /* Error message */ |
| 86 | }; |
| 87 | |
| 88 | /* Initialize a CsvReader object */ |
| 89 | static void csv_reader_init(CsvReader *p){ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 90 | p->in = 0; |
| 91 | p->z = 0; |
| 92 | p->n = 0; |
| 93 | p->nAlloc = 0; |
| 94 | p->nLine = 0; |
drh | d5fbde8 | 2017-06-26 18:42:23 +0000 | [diff] [blame] | 95 | p->bNotFirst = 0; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 96 | p->nIn = 0; |
| 97 | p->zIn = 0; |
| 98 | p->zErr[0] = 0; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 99 | } |
| 100 | |
| 101 | /* Close and reset a CsvReader object */ |
| 102 | static void csv_reader_reset(CsvReader *p){ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 103 | if( p->in ){ |
| 104 | fclose(p->in); |
| 105 | sqlite3_free(p->zIn); |
| 106 | } |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 107 | sqlite3_free(p->z); |
| 108 | csv_reader_init(p); |
| 109 | } |
| 110 | |
| 111 | /* Report an error on a CsvReader */ |
| 112 | static void csv_errmsg(CsvReader *p, const char *zFormat, ...){ |
| 113 | va_list ap; |
| 114 | va_start(ap, zFormat); |
| 115 | sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap); |
| 116 | va_end(ap); |
| 117 | } |
| 118 | |
| 119 | /* Open the file associated with a CsvReader |
| 120 | ** Return the number of errors. |
| 121 | */ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 122 | static int csv_reader_open( |
| 123 | CsvReader *p, /* The reader to open */ |
| 124 | const char *zFilename, /* Read from this filename */ |
| 125 | const char *zData /* ... or use this data */ |
| 126 | ){ |
| 127 | if( zFilename ){ |
| 128 | p->zIn = sqlite3_malloc( CSV_INBUFSZ ); |
| 129 | if( p->zIn==0 ){ |
| 130 | csv_errmsg(p, "out of memory"); |
| 131 | return 1; |
| 132 | } |
| 133 | p->in = fopen(zFilename, "rb"); |
| 134 | if( p->in==0 ){ |
| 135 | csv_reader_reset(p); |
| 136 | csv_errmsg(p, "cannot open '%s' for reading", zFilename); |
| 137 | return 1; |
| 138 | } |
| 139 | }else{ |
| 140 | assert( p->in==0 ); |
| 141 | p->zIn = (char*)zData; |
| 142 | p->nIn = strlen(zData); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 143 | } |
| 144 | return 0; |
| 145 | } |
| 146 | |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 147 | /* The input buffer has overflowed. Refill the input buffer, then |
| 148 | ** return the next character |
| 149 | */ |
| 150 | static CSV_NOINLINE int csv_getc_refill(CsvReader *p){ |
| 151 | size_t got; |
| 152 | |
| 153 | assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */ |
| 154 | assert( p->in!=0 ); /* Only called if reading froma file */ |
| 155 | |
| 156 | got = fread(p->zIn, 1, CSV_INBUFSZ, p->in); |
| 157 | if( got==0 ) return EOF; |
| 158 | p->nIn = got; |
| 159 | p->iIn = 1; |
| 160 | return p->zIn[0]; |
| 161 | } |
| 162 | |
| 163 | /* Return the next character of input. Return EOF at end of input. */ |
| 164 | static int csv_getc(CsvReader *p){ |
| 165 | if( p->iIn >= p->nIn ){ |
| 166 | if( p->in!=0 ) return csv_getc_refill(p); |
| 167 | return EOF; |
| 168 | } |
| 169 | return p->zIn[p->iIn++]; |
| 170 | } |
| 171 | |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 172 | /* Increase the size of p->z and append character c to the end. |
| 173 | ** Return 0 on success and non-zero if there is an OOM error */ |
| 174 | static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){ |
| 175 | char *zNew; |
| 176 | int nNew = p->nAlloc*2 + 100; |
| 177 | zNew = sqlite3_realloc64(p->z, nNew); |
| 178 | if( zNew ){ |
| 179 | p->z = zNew; |
| 180 | p->nAlloc = nNew; |
| 181 | p->z[p->n++] = c; |
| 182 | return 0; |
| 183 | }else{ |
| 184 | csv_errmsg(p, "out of memory"); |
| 185 | return 1; |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | /* Append a single character to the CsvReader.z[] array. |
| 190 | ** Return 0 on success and non-zero if there is an OOM error */ |
| 191 | static int csv_append(CsvReader *p, char c){ |
| 192 | if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c); |
| 193 | p->z[p->n++] = c; |
| 194 | return 0; |
| 195 | } |
| 196 | |
| 197 | /* Read a single field of CSV text. Compatible with rfc4180 and extended |
| 198 | ** with the option of having a separator other than ",". |
| 199 | ** |
| 200 | ** + Input comes from p->in. |
| 201 | ** + Store results in p->z of length p->n. Space to hold p->z comes |
| 202 | ** from sqlite3_malloc64(). |
| 203 | ** + Keep track of the line number in p->nLine. |
| 204 | ** + Store the character that terminates the field in p->cTerm. Store |
| 205 | ** EOF on end-of-file. |
| 206 | ** |
| 207 | ** Return "" at EOF. Return 0 on an OOM error. |
| 208 | */ |
| 209 | static char *csv_read_one_field(CsvReader *p){ |
| 210 | int c; |
| 211 | p->n = 0; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 212 | c = csv_getc(p); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 213 | if( c==EOF ){ |
| 214 | p->cTerm = EOF; |
| 215 | return ""; |
| 216 | } |
| 217 | if( c=='"' ){ |
| 218 | int pc, ppc; |
| 219 | int startLine = p->nLine; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 220 | pc = ppc = 0; |
| 221 | while( 1 ){ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 222 | c = csv_getc(p); |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 223 | if( c<='"' || pc=='"' ){ |
| 224 | if( c=='\n' ) p->nLine++; |
| 225 | if( c=='"' ){ |
| 226 | if( pc=='"' ){ |
| 227 | pc = 0; |
| 228 | continue; |
| 229 | } |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 230 | } |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 231 | if( (c==',' && pc=='"') |
| 232 | || (c=='\n' && pc=='"') |
| 233 | || (c=='\n' && pc=='\r' && ppc=='"') |
| 234 | || (c==EOF && pc=='"') |
| 235 | ){ |
| 236 | do{ p->n--; }while( p->z[p->n]!='"' ); |
mistachkin | 80f2b33 | 2016-07-22 21:26:56 +0000 | [diff] [blame] | 237 | p->cTerm = (char)c; |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 238 | break; |
| 239 | } |
| 240 | if( pc=='"' && c!='\r' ){ |
| 241 | csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"'); |
| 242 | break; |
| 243 | } |
| 244 | if( c==EOF ){ |
| 245 | csv_errmsg(p, "line %d: unterminated %c-quoted field\n", |
| 246 | startLine, '"'); |
mistachkin | 80f2b33 | 2016-07-22 21:26:56 +0000 | [diff] [blame] | 247 | p->cTerm = (char)c; |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 248 | break; |
| 249 | } |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 250 | } |
| 251 | if( csv_append(p, (char)c) ) return 0; |
| 252 | ppc = pc; |
| 253 | pc = c; |
| 254 | } |
| 255 | }else{ |
drh | d5fbde8 | 2017-06-26 18:42:23 +0000 | [diff] [blame] | 256 | /* If this is the first field being parsed and it begins with the |
| 257 | ** UTF-8 BOM (0xEF BB BF) then skip the BOM */ |
| 258 | if( (c&0xff)==0xef && p->bNotFirst==0 ){ |
drh | 2fb960b | 2017-06-28 15:17:31 +0000 | [diff] [blame] | 259 | csv_append(p, (char)c); |
drh | d5fbde8 | 2017-06-26 18:42:23 +0000 | [diff] [blame] | 260 | c = csv_getc(p); |
| 261 | if( (c&0xff)==0xbb ){ |
drh | 2fb960b | 2017-06-28 15:17:31 +0000 | [diff] [blame] | 262 | csv_append(p, (char)c); |
drh | d5fbde8 | 2017-06-26 18:42:23 +0000 | [diff] [blame] | 263 | c = csv_getc(p); |
| 264 | if( (c&0xff)==0xbf ){ |
| 265 | p->bNotFirst = 1; |
| 266 | p->n = 0; |
| 267 | return csv_read_one_field(p); |
| 268 | } |
| 269 | } |
| 270 | } |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 271 | while( c>',' || (c!=EOF && c!=',' && c!='\n') ){ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 272 | if( csv_append(p, (char)c) ) return 0; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 273 | c = csv_getc(p); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 274 | } |
| 275 | if( c=='\n' ){ |
| 276 | p->nLine++; |
| 277 | if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; |
| 278 | } |
mistachkin | 80f2b33 | 2016-07-22 21:26:56 +0000 | [diff] [blame] | 279 | p->cTerm = (char)c; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 280 | } |
| 281 | if( p->z ) p->z[p->n] = 0; |
drh | d5fbde8 | 2017-06-26 18:42:23 +0000 | [diff] [blame] | 282 | p->bNotFirst = 1; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 283 | return p->z; |
| 284 | } |
| 285 | |
| 286 | |
| 287 | /* Forward references to the various virtual table methods implemented |
| 288 | ** in this file. */ |
| 289 | static int csvtabCreate(sqlite3*, void*, int, const char*const*, |
| 290 | sqlite3_vtab**,char**); |
| 291 | static int csvtabConnect(sqlite3*, void*, int, const char*const*, |
| 292 | sqlite3_vtab**,char**); |
| 293 | static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*); |
| 294 | static int csvtabDisconnect(sqlite3_vtab*); |
| 295 | static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**); |
| 296 | static int csvtabClose(sqlite3_vtab_cursor*); |
| 297 | static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, |
| 298 | int argc, sqlite3_value **argv); |
| 299 | static int csvtabNext(sqlite3_vtab_cursor*); |
| 300 | static int csvtabEof(sqlite3_vtab_cursor*); |
| 301 | static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int); |
| 302 | static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*); |
| 303 | |
| 304 | /* An instance of the CSV virtual table */ |
| 305 | typedef struct CsvTable { |
| 306 | sqlite3_vtab base; /* Base class. Must be first */ |
| 307 | char *zFilename; /* Name of the CSV file */ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 308 | char *zData; /* Raw CSV data in lieu of zFilename */ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 309 | long iStart; /* Offset to start of data in zFilename */ |
| 310 | int nCol; /* Number of columns in the CSV file */ |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 311 | unsigned int tstFlags; /* Bit values used for testing */ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 312 | } CsvTable; |
| 313 | |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 314 | /* Allowed values for tstFlags */ |
| 315 | #define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/ |
| 316 | |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 317 | /* A cursor for the CSV virtual table */ |
| 318 | typedef struct CsvCursor { |
| 319 | sqlite3_vtab_cursor base; /* Base class. Must be first */ |
| 320 | CsvReader rdr; /* The CsvReader object */ |
| 321 | char **azVal; /* Value of the current row */ |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 322 | int *aLen; /* Length of each entry */ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 323 | sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */ |
| 324 | } CsvCursor; |
| 325 | |
| 326 | /* Transfer error message text from a reader into a CsvTable */ |
| 327 | static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){ |
| 328 | sqlite3_free(pTab->base.zErrMsg); |
| 329 | pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr); |
| 330 | } |
| 331 | |
| 332 | /* |
| 333 | ** This method is the destructor fo a CsvTable object. |
| 334 | */ |
| 335 | static int csvtabDisconnect(sqlite3_vtab *pVtab){ |
| 336 | CsvTable *p = (CsvTable*)pVtab; |
| 337 | sqlite3_free(p->zFilename); |
drh | 35db31b | 2016-06-02 23:13:21 +0000 | [diff] [blame] | 338 | sqlite3_free(p->zData); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 339 | sqlite3_free(p); |
| 340 | return SQLITE_OK; |
| 341 | } |
| 342 | |
| 343 | /* Skip leading whitespace. Return a pointer to the first non-whitespace |
| 344 | ** character, or to the zero terminator if the string has only whitespace */ |
| 345 | static const char *csv_skip_whitespace(const char *z){ |
| 346 | while( isspace((unsigned char)z[0]) ) z++; |
| 347 | return z; |
| 348 | } |
| 349 | |
| 350 | /* Remove trailing whitespace from the end of string z[] */ |
| 351 | static void csv_trim_whitespace(char *z){ |
| 352 | size_t n = strlen(z); |
| 353 | while( n>0 && isspace((unsigned char)z[n]) ) n--; |
| 354 | z[n] = 0; |
| 355 | } |
| 356 | |
| 357 | /* Dequote the string */ |
| 358 | static void csv_dequote(char *z){ |
mistachkin | 80f2b33 | 2016-07-22 21:26:56 +0000 | [diff] [blame] | 359 | int j; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 360 | char cQuote = z[0]; |
mistachkin | 80f2b33 | 2016-07-22 21:26:56 +0000 | [diff] [blame] | 361 | size_t i, n; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 362 | |
| 363 | if( cQuote!='\'' && cQuote!='"' ) return; |
| 364 | n = strlen(z); |
| 365 | if( n<2 || z[n-1]!=z[0] ) return; |
| 366 | for(i=1, j=0; i<n-1; i++){ |
| 367 | if( z[i]==cQuote && z[i+1]==cQuote ) i++; |
| 368 | z[j++] = z[i]; |
| 369 | } |
| 370 | z[j] = 0; |
| 371 | } |
| 372 | |
| 373 | /* Check to see if the string is of the form: "TAG = VALUE" with optional |
| 374 | ** whitespace before and around tokens. If it is, return a pointer to the |
| 375 | ** first character of VALUE. If it is not, return NULL. |
| 376 | */ |
| 377 | static const char *csv_parameter(const char *zTag, int nTag, const char *z){ |
| 378 | z = csv_skip_whitespace(z); |
| 379 | if( strncmp(zTag, z, nTag)!=0 ) return 0; |
| 380 | z = csv_skip_whitespace(z+nTag); |
| 381 | if( z[0]!='=' ) return 0; |
| 382 | return csv_skip_whitespace(z+1); |
| 383 | } |
| 384 | |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 385 | /* Decode a parameter that requires a dequoted string. |
| 386 | ** |
| 387 | ** Return 1 if the parameter is seen, or 0 if not. 1 is returned |
| 388 | ** even if there is an error. If an error occurs, then an error message |
| 389 | ** is left in p->zErr. If there are no errors, p->zErr[0]==0. |
| 390 | */ |
| 391 | static int csv_string_parameter( |
| 392 | CsvReader *p, /* Leave the error message here, if there is one */ |
| 393 | const char *zParam, /* Parameter we are checking for */ |
| 394 | const char *zArg, /* Raw text of the virtual table argment */ |
| 395 | char **pzVal /* Write the dequoted string value here */ |
| 396 | ){ |
| 397 | const char *zValue; |
drh | 11499f0 | 2016-07-09 16:38:25 +0000 | [diff] [blame] | 398 | zValue = csv_parameter(zParam,(int)strlen(zParam),zArg); |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 399 | if( zValue==0 ) return 0; |
| 400 | p->zErr[0] = 0; |
| 401 | if( *pzVal ){ |
| 402 | csv_errmsg(p, "more than one '%s' parameter", zParam); |
| 403 | return 1; |
| 404 | } |
| 405 | *pzVal = sqlite3_mprintf("%s", zValue); |
| 406 | if( *pzVal==0 ){ |
| 407 | csv_errmsg(p, "out of memory"); |
| 408 | return 1; |
| 409 | } |
| 410 | csv_trim_whitespace(*pzVal); |
| 411 | csv_dequote(*pzVal); |
| 412 | return 1; |
| 413 | } |
| 414 | |
| 415 | |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 416 | /* Return 0 if the argument is false and 1 if it is true. Return -1 if |
| 417 | ** we cannot really tell. |
| 418 | */ |
| 419 | static int csv_boolean(const char *z){ |
| 420 | if( sqlite3_stricmp("yes",z)==0 |
| 421 | || sqlite3_stricmp("on",z)==0 |
| 422 | || sqlite3_stricmp("true",z)==0 |
mistachkin | 58282f6 | 2016-11-09 01:46:13 +0000 | [diff] [blame] | 423 | || (z[0]=='1' && z[1]==0) |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 424 | ){ |
| 425 | return 1; |
| 426 | } |
| 427 | if( sqlite3_stricmp("no",z)==0 |
| 428 | || sqlite3_stricmp("off",z)==0 |
| 429 | || sqlite3_stricmp("false",z)==0 |
| 430 | || (z[0]=='0' && z[1]==0) |
| 431 | ){ |
| 432 | return 0; |
| 433 | } |
| 434 | return -1; |
| 435 | } |
| 436 | |
| 437 | |
| 438 | /* |
| 439 | ** Parameters: |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 440 | ** filename=FILENAME Name of file containing CSV content |
| 441 | ** data=TEXT Direct CSV content. |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 442 | ** schema=SCHEMA Alternative CSV schema. |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 443 | ** header=YES|NO First row of CSV defines the names of |
| 444 | ** columns if "yes". Default "no". |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 445 | ** columns=N Assume the CSV file contains N columns. |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 446 | ** |
| 447 | ** Only available if compiled with SQLITE_TEST: |
| 448 | ** |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 449 | ** testflags=N Bitmask of test flags. Optional |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 450 | ** |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 451 | ** If schema= is omitted, then the columns are named "c0", "c1", "c2", |
| 452 | ** and so forth. If columns=N is omitted, then the file is opened and |
| 453 | ** the number of columns in the first row is counted to determine the |
| 454 | ** column count. If header=YES, then the first row is skipped. |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 455 | */ |
| 456 | static int csvtabConnect( |
| 457 | sqlite3 *db, |
| 458 | void *pAux, |
| 459 | int argc, const char *const*argv, |
| 460 | sqlite3_vtab **ppVtab, |
| 461 | char **pzErr |
| 462 | ){ |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 463 | CsvTable *pNew = 0; /* The CsvTable object to construct */ |
| 464 | int bHeader = -1; /* header= flags. -1 means not seen yet */ |
| 465 | int rc = SQLITE_OK; /* Result code from this routine */ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 466 | int i, j; /* Loop counters */ |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 467 | #ifdef SQLITE_TEST |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 468 | int tstFlags = 0; /* Value for testflags=N parameter */ |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 469 | #endif |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 470 | int nCol = -99; /* Value of the columns= parameter */ |
| 471 | CsvReader sRdr; /* A CSV file reader used to store an error |
| 472 | ** message and/or to count the number of columns */ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 473 | static const char *azParam[] = { |
| 474 | "filename", "data", "schema", |
| 475 | }; |
| 476 | char *azPValue[3]; /* Parameter values */ |
| 477 | # define CSV_FILENAME (azPValue[0]) |
| 478 | # define CSV_DATA (azPValue[1]) |
| 479 | # define CSV_SCHEMA (azPValue[2]) |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 480 | |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 481 | |
| 482 | assert( sizeof(azPValue)==sizeof(azParam) ); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 483 | memset(&sRdr, 0, sizeof(sRdr)); |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 484 | memset(azPValue, 0, sizeof(azPValue)); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 485 | for(i=3; i<argc; i++){ |
| 486 | const char *z = argv[i]; |
| 487 | const char *zValue; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 488 | for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){ |
| 489 | if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break; |
| 490 | } |
| 491 | if( j<sizeof(azParam)/sizeof(azParam[0]) ){ |
| 492 | if( sRdr.zErr[0] ) goto csvtab_connect_error; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 493 | }else |
| 494 | if( (zValue = csv_parameter("header",6,z))!=0 ){ |
| 495 | int x; |
| 496 | if( bHeader>=0 ){ |
| 497 | csv_errmsg(&sRdr, "more than one 'header' parameter"); |
| 498 | goto csvtab_connect_error; |
| 499 | } |
| 500 | x = csv_boolean(zValue); |
| 501 | if( x==1 ){ |
| 502 | bHeader = 1; |
| 503 | }else if( x==0 ){ |
| 504 | bHeader = 0; |
| 505 | }else{ |
| 506 | csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue); |
| 507 | goto csvtab_connect_error; |
| 508 | } |
| 509 | }else |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 510 | #ifdef SQLITE_TEST |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 511 | if( (zValue = csv_parameter("testflags",9,z))!=0 ){ |
| 512 | tstFlags = (unsigned int)atoi(zValue); |
| 513 | }else |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 514 | #endif |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 515 | if( (zValue = csv_parameter("columns",7,z))!=0 ){ |
| 516 | if( nCol>0 ){ |
| 517 | csv_errmsg(&sRdr, "more than one 'columns' parameter"); |
| 518 | goto csvtab_connect_error; |
| 519 | } |
| 520 | nCol = atoi(zValue); |
| 521 | if( nCol<=0 ){ |
| 522 | csv_errmsg(&sRdr, "must have at least one column"); |
| 523 | goto csvtab_connect_error; |
| 524 | } |
| 525 | }else |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 526 | { |
| 527 | csv_errmsg(&sRdr, "unrecognized parameter '%s'", z); |
| 528 | goto csvtab_connect_error; |
| 529 | } |
| 530 | } |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 531 | if( (CSV_FILENAME==0)==(CSV_DATA==0) ){ |
| 532 | csv_errmsg(&sRdr, "must either filename= or data= but not both"); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 533 | goto csvtab_connect_error; |
| 534 | } |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 535 | if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 536 | goto csvtab_connect_error; |
| 537 | } |
| 538 | pNew = sqlite3_malloc( sizeof(*pNew) ); |
| 539 | *ppVtab = (sqlite3_vtab*)pNew; |
| 540 | if( pNew==0 ) goto csvtab_connect_oom; |
| 541 | memset(pNew, 0, sizeof(*pNew)); |
drh | 1fc1a0f | 2016-05-31 18:44:33 +0000 | [diff] [blame] | 542 | if( nCol>0 ){ |
| 543 | pNew->nCol = nCol; |
| 544 | }else{ |
| 545 | do{ |
| 546 | const char *z = csv_read_one_field(&sRdr); |
| 547 | if( z==0 ) goto csvtab_connect_oom; |
| 548 | pNew->nCol++; |
| 549 | }while( sRdr.cTerm==',' ); |
| 550 | } |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 551 | pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0; |
| 552 | pNew->zData = CSV_DATA; CSV_DATA = 0; |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 553 | #ifdef SQLITE_TEST |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 554 | pNew->tstFlags = tstFlags; |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 555 | #endif |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 556 | pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0; |
| 557 | csv_reader_reset(&sRdr); |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 558 | if( CSV_SCHEMA==0 ){ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 559 | char *zSep = ""; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 560 | CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x("); |
| 561 | if( CSV_SCHEMA==0 ) goto csvtab_connect_oom; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 562 | for(i=0; i<pNew->nCol; i++){ |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 563 | CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 564 | zSep = ","; |
| 565 | } |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 566 | CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 567 | } |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 568 | rc = sqlite3_declare_vtab(db, CSV_SCHEMA); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 569 | if( rc ) goto csvtab_connect_error; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 570 | for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ |
| 571 | sqlite3_free(azPValue[i]); |
| 572 | } |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 573 | return SQLITE_OK; |
| 574 | |
| 575 | csvtab_connect_oom: |
| 576 | rc = SQLITE_NOMEM; |
| 577 | csv_errmsg(&sRdr, "out of memory"); |
| 578 | |
| 579 | csvtab_connect_error: |
| 580 | if( pNew ) csvtabDisconnect(&pNew->base); |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 581 | for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ |
| 582 | sqlite3_free(azPValue[i]); |
| 583 | } |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 584 | if( sRdr.zErr[0] ){ |
| 585 | sqlite3_free(*pzErr); |
| 586 | *pzErr = sqlite3_mprintf("%s", sRdr.zErr); |
| 587 | } |
| 588 | csv_reader_reset(&sRdr); |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 589 | if( rc==SQLITE_OK ) rc = SQLITE_ERROR; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 590 | return rc; |
| 591 | } |
| 592 | |
| 593 | /* |
| 594 | ** Reset the current row content held by a CsvCursor. |
| 595 | */ |
| 596 | static void csvtabCursorRowReset(CsvCursor *pCur){ |
| 597 | CsvTable *pTab = (CsvTable*)pCur->base.pVtab; |
| 598 | int i; |
| 599 | for(i=0; i<pTab->nCol; i++){ |
| 600 | sqlite3_free(pCur->azVal[i]); |
| 601 | pCur->azVal[i] = 0; |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 602 | pCur->aLen[i] = 0; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 603 | } |
| 604 | } |
| 605 | |
| 606 | /* |
| 607 | ** The xConnect and xCreate methods do the same thing, but they must be |
| 608 | ** different so that the virtual table is not an eponymous virtual table. |
| 609 | */ |
| 610 | static int csvtabCreate( |
| 611 | sqlite3 *db, |
| 612 | void *pAux, |
| 613 | int argc, const char *const*argv, |
| 614 | sqlite3_vtab **ppVtab, |
| 615 | char **pzErr |
| 616 | ){ |
| 617 | return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr); |
| 618 | } |
| 619 | |
| 620 | /* |
| 621 | ** Destructor for a CsvCursor. |
| 622 | */ |
| 623 | static int csvtabClose(sqlite3_vtab_cursor *cur){ |
| 624 | CsvCursor *pCur = (CsvCursor*)cur; |
| 625 | csvtabCursorRowReset(pCur); |
| 626 | csv_reader_reset(&pCur->rdr); |
| 627 | sqlite3_free(cur); |
| 628 | return SQLITE_OK; |
| 629 | } |
| 630 | |
| 631 | /* |
| 632 | ** Constructor for a new CsvTable cursor object. |
| 633 | */ |
| 634 | static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ |
| 635 | CsvTable *pTab = (CsvTable*)p; |
| 636 | CsvCursor *pCur; |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 637 | size_t nByte; |
| 638 | nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol; |
drh | 11499f0 | 2016-07-09 16:38:25 +0000 | [diff] [blame] | 639 | pCur = sqlite3_malloc64( nByte ); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 640 | if( pCur==0 ) return SQLITE_NOMEM; |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 641 | memset(pCur, 0, nByte); |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 642 | pCur->azVal = (char**)&pCur[1]; |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 643 | pCur->aLen = (int*)&pCur->azVal[pTab->nCol]; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 644 | *ppCursor = &pCur->base; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 645 | if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 646 | csv_xfer_error(pTab, &pCur->rdr); |
| 647 | return SQLITE_ERROR; |
| 648 | } |
| 649 | return SQLITE_OK; |
| 650 | } |
| 651 | |
| 652 | |
| 653 | /* |
| 654 | ** Advance a CsvCursor to its next row of input. |
| 655 | ** Set the EOF marker if we reach the end of input. |
| 656 | */ |
| 657 | static int csvtabNext(sqlite3_vtab_cursor *cur){ |
| 658 | CsvCursor *pCur = (CsvCursor*)cur; |
| 659 | CsvTable *pTab = (CsvTable*)cur->pVtab; |
| 660 | int i = 0; |
| 661 | char *z; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 662 | do{ |
| 663 | z = csv_read_one_field(&pCur->rdr); |
| 664 | if( z==0 ){ |
| 665 | csv_xfer_error(pTab, &pCur->rdr); |
| 666 | break; |
| 667 | } |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 668 | if( i<pTab->nCol ){ |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 669 | if( pCur->aLen[i] < pCur->rdr.n+1 ){ |
drh | 11499f0 | 2016-07-09 16:38:25 +0000 | [diff] [blame] | 670 | char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1); |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 671 | if( zNew==0 ){ |
| 672 | csv_errmsg(&pCur->rdr, "out of memory"); |
| 673 | csv_xfer_error(pTab, &pCur->rdr); |
| 674 | break; |
| 675 | } |
| 676 | pCur->azVal[i] = zNew; |
| 677 | pCur->aLen[i] = pCur->rdr.n+1; |
| 678 | } |
| 679 | memcpy(pCur->azVal[i], z, pCur->rdr.n+1); |
| 680 | i++; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 681 | } |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 682 | }while( pCur->rdr.cTerm==',' ); |
drh | 4f57352 | 2017-08-08 20:03:10 +0000 | [diff] [blame] | 683 | if( z==0 || (pCur->rdr.cTerm==EOF && i<pTab->nCol) ){ |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 684 | pCur->iRowid = -1; |
| 685 | }else{ |
| 686 | pCur->iRowid++; |
drh | 4f57352 | 2017-08-08 20:03:10 +0000 | [diff] [blame] | 687 | while( i<pTab->nCol ){ |
| 688 | sqlite3_free(pCur->azVal[i]); |
| 689 | pCur->azVal[i] = 0; |
| 690 | pCur->aLen[i] = 0; |
| 691 | i++; |
| 692 | } |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 693 | } |
| 694 | return SQLITE_OK; |
| 695 | } |
| 696 | |
| 697 | /* |
| 698 | ** Return values of columns for the row at which the CsvCursor |
| 699 | ** is currently pointing. |
| 700 | */ |
| 701 | static int csvtabColumn( |
| 702 | sqlite3_vtab_cursor *cur, /* The cursor */ |
| 703 | sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ |
| 704 | int i /* Which column to return */ |
| 705 | ){ |
| 706 | CsvCursor *pCur = (CsvCursor*)cur; |
| 707 | CsvTable *pTab = (CsvTable*)cur->pVtab; |
| 708 | if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){ |
| 709 | sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC); |
| 710 | } |
| 711 | return SQLITE_OK; |
| 712 | } |
| 713 | |
| 714 | /* |
| 715 | ** Return the rowid for the current row. |
| 716 | */ |
| 717 | static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ |
| 718 | CsvCursor *pCur = (CsvCursor*)cur; |
| 719 | *pRowid = pCur->iRowid; |
| 720 | return SQLITE_OK; |
| 721 | } |
| 722 | |
| 723 | /* |
| 724 | ** Return TRUE if the cursor has been moved off of the last |
| 725 | ** row of output. |
| 726 | */ |
| 727 | static int csvtabEof(sqlite3_vtab_cursor *cur){ |
| 728 | CsvCursor *pCur = (CsvCursor*)cur; |
| 729 | return pCur->iRowid<0; |
| 730 | } |
| 731 | |
| 732 | /* |
| 733 | ** Only a full table scan is supported. So xFilter simply rewinds to |
| 734 | ** the beginning. |
| 735 | */ |
| 736 | static int csvtabFilter( |
| 737 | sqlite3_vtab_cursor *pVtabCursor, |
| 738 | int idxNum, const char *idxStr, |
| 739 | int argc, sqlite3_value **argv |
| 740 | ){ |
| 741 | CsvCursor *pCur = (CsvCursor*)pVtabCursor; |
| 742 | CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab; |
| 743 | pCur->iRowid = 0; |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 744 | if( pCur->rdr.in==0 ){ |
| 745 | assert( pCur->rdr.zIn==pTab->zData ); |
mistachkin | 80f2b33 | 2016-07-22 21:26:56 +0000 | [diff] [blame] | 746 | assert( pTab->iStart>=0 ); |
| 747 | assert( (size_t)pTab->iStart<=pCur->rdr.nIn ); |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 748 | pCur->rdr.iIn = pTab->iStart; |
| 749 | }else{ |
| 750 | fseek(pCur->rdr.in, pTab->iStart, SEEK_SET); |
| 751 | pCur->rdr.iIn = 0; |
| 752 | pCur->rdr.nIn = 0; |
| 753 | } |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 754 | return csvtabNext(pVtabCursor); |
| 755 | } |
| 756 | |
| 757 | /* |
drh | adcba64 | 2016-06-02 17:44:24 +0000 | [diff] [blame] | 758 | ** Only a forward full table scan is supported. xBestIndex is mostly |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 759 | ** a no-op. If CSVTEST_FIDX is set, then the presence of equality |
| 760 | ** constraints lowers the estimated cost, which is fiction, but is useful |
| 761 | ** for testing certain kinds of virtual table behavior. |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 762 | */ |
| 763 | static int csvtabBestIndex( |
| 764 | sqlite3_vtab *tab, |
| 765 | sqlite3_index_info *pIdxInfo |
| 766 | ){ |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 767 | pIdxInfo->estimatedCost = 1000000; |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 768 | #ifdef SQLITE_TEST |
| 769 | if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){ |
| 770 | /* The usual (and sensible) case is to always do a full table scan. |
| 771 | ** The code in this branch only runs when testflags=1. This code |
| 772 | ** generates an artifical and unrealistic plan which is useful |
| 773 | ** for testing virtual table logic but is not helpful to real applications. |
| 774 | ** |
| 775 | ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual |
| 776 | ** table (even though it is not) and the cost of running the virtual table |
| 777 | ** is reduced from 1 million to just 10. The constraints are *not* marked |
| 778 | ** as omittable, however, so the query planner should still generate a |
| 779 | ** plan that gives a correct answer, even if they plan is not optimal. |
| 780 | */ |
| 781 | int i; |
| 782 | int nConst = 0; |
| 783 | for(i=0; i<pIdxInfo->nConstraint; i++){ |
| 784 | unsigned char op; |
| 785 | if( pIdxInfo->aConstraint[i].usable==0 ) continue; |
| 786 | op = pIdxInfo->aConstraint[i].op; |
| 787 | if( op==SQLITE_INDEX_CONSTRAINT_EQ |
| 788 | || op==SQLITE_INDEX_CONSTRAINT_LIKE |
| 789 | || op==SQLITE_INDEX_CONSTRAINT_GLOB |
| 790 | ){ |
| 791 | pIdxInfo->estimatedCost = 10; |
| 792 | pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1; |
| 793 | nConst++; |
| 794 | } |
drh | abfd272 | 2016-05-31 18:08:35 +0000 | [diff] [blame] | 795 | } |
| 796 | } |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 797 | #endif |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 798 | return SQLITE_OK; |
| 799 | } |
| 800 | |
| 801 | |
| 802 | static sqlite3_module CsvModule = { |
| 803 | 0, /* iVersion */ |
| 804 | csvtabCreate, /* xCreate */ |
| 805 | csvtabConnect, /* xConnect */ |
| 806 | csvtabBestIndex, /* xBestIndex */ |
| 807 | csvtabDisconnect, /* xDisconnect */ |
| 808 | csvtabDisconnect, /* xDestroy */ |
| 809 | csvtabOpen, /* xOpen - open a cursor */ |
| 810 | csvtabClose, /* xClose - close a cursor */ |
| 811 | csvtabFilter, /* xFilter - configure scan constraints */ |
| 812 | csvtabNext, /* xNext - advance a cursor */ |
| 813 | csvtabEof, /* xEof - check for end of scan */ |
| 814 | csvtabColumn, /* xColumn - read data */ |
| 815 | csvtabRowid, /* xRowid - read data */ |
| 816 | 0, /* xUpdate */ |
| 817 | 0, /* xBegin */ |
| 818 | 0, /* xSync */ |
| 819 | 0, /* xCommit */ |
| 820 | 0, /* xRollback */ |
| 821 | 0, /* xFindMethod */ |
| 822 | 0, /* xRename */ |
| 823 | }; |
| 824 | |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 825 | #ifdef SQLITE_TEST |
| 826 | /* |
| 827 | ** For virtual table testing, make a version of the CSV virtual table |
| 828 | ** available that has an xUpdate function. But the xUpdate always returns |
| 829 | ** SQLITE_READONLY since the CSV file is not really writable. |
| 830 | */ |
| 831 | static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){ |
| 832 | return SQLITE_READONLY; |
| 833 | } |
| 834 | static sqlite3_module CsvModuleFauxWrite = { |
| 835 | 0, /* iVersion */ |
| 836 | csvtabCreate, /* xCreate */ |
| 837 | csvtabConnect, /* xConnect */ |
| 838 | csvtabBestIndex, /* xBestIndex */ |
| 839 | csvtabDisconnect, /* xDisconnect */ |
| 840 | csvtabDisconnect, /* xDestroy */ |
| 841 | csvtabOpen, /* xOpen - open a cursor */ |
| 842 | csvtabClose, /* xClose - close a cursor */ |
| 843 | csvtabFilter, /* xFilter - configure scan constraints */ |
| 844 | csvtabNext, /* xNext - advance a cursor */ |
| 845 | csvtabEof, /* xEof - check for end of scan */ |
| 846 | csvtabColumn, /* xColumn - read data */ |
| 847 | csvtabRowid, /* xRowid - read data */ |
| 848 | csvtabUpdate, /* xUpdate */ |
| 849 | 0, /* xBegin */ |
| 850 | 0, /* xSync */ |
| 851 | 0, /* xCommit */ |
| 852 | 0, /* xRollback */ |
| 853 | 0, /* xFindMethod */ |
| 854 | 0, /* xRename */ |
| 855 | }; |
| 856 | #endif /* SQLITE_TEST */ |
| 857 | |
drh | eb5a549 | 2016-07-15 02:50:18 +0000 | [diff] [blame] | 858 | #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */ |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 859 | |
| 860 | |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 861 | #ifdef _WIN32 |
| 862 | __declspec(dllexport) |
| 863 | #endif |
| 864 | /* |
| 865 | ** This routine is called when the extension is loaded. The new |
| 866 | ** CSV virtual table module is registered with the calling database |
| 867 | ** connection. |
| 868 | */ |
| 869 | int sqlite3_csv_init( |
| 870 | sqlite3 *db, |
| 871 | char **pzErrMsg, |
| 872 | const sqlite3_api_routines *pApi |
| 873 | ){ |
drh | eb5a549 | 2016-07-15 02:50:18 +0000 | [diff] [blame] | 874 | #ifndef SQLITE_OMIT_VIRTUALTABLE |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 875 | int rc; |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 876 | SQLITE_EXTENSION_INIT2(pApi); |
drh | ac9c3d2 | 2016-06-03 01:01:57 +0000 | [diff] [blame] | 877 | rc = sqlite3_create_module(db, "csv", &CsvModule, 0); |
| 878 | #ifdef SQLITE_TEST |
| 879 | if( rc==SQLITE_OK ){ |
| 880 | rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0); |
| 881 | } |
| 882 | #endif |
| 883 | return rc; |
drh | eb5a549 | 2016-07-15 02:50:18 +0000 | [diff] [blame] | 884 | #else |
| 885 | return SQLITE_OK; |
| 886 | #endif |
drh | 724b189 | 2016-05-31 16:22:48 +0000 | [diff] [blame] | 887 | } |