blob: 3a7e32d3117bb7db517d8fc13a7feeda20054ae0 [file] [log] [blame]
drh724b1892016-05-31 16:22:48 +00001/*
2** 2016-05-28
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11******************************************************************************
12**
13** This file contains the implementation of an SQLite virtual table for
14** reading CSV files.
15**
16** Usage:
17**
18** .load ./csv
19** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
20** SELECT * FROM csv;
21**
22** The columns are named "c1", "c2", "c3", ... by default. But the
23** application can define its own CREATE TABLE statement as an additional
24** parameter. For example:
25**
26** CREATE VIRTUAL TABLE temp.csv2 USING csv(
27** filename = "../http.log",
28** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
29** );
drhac9c3d22016-06-03 01:01:57 +000030**
31** Instead of specifying a file, the text of the CSV can be loaded using
32** the data= parameter.
33**
34** If the columns=N parameter is supplied, then the CSV file is assumed to have
35** N columns. If the columns parameter is omitted, the CSV file is opened
36** as soon as the virtual table is constructed and the first row of the CSV
37** is read in order to count the tables.
38**
39** Some extra debugging features (used for testing virtual tables) are available
40** if this module is compiled with -DSQLITE_TEST.
drh724b1892016-05-31 16:22:48 +000041*/
42#include <sqlite3ext.h>
43SQLITE_EXTENSION_INIT1
44#include <string.h>
45#include <stdlib.h>
46#include <assert.h>
47#include <stdarg.h>
48#include <ctype.h>
49#include <stdio.h>
50
51/*
52** A macro to hint to the compiler that a function should not be
53** inlined.
54*/
55#if defined(__GNUC__)
56# define CSV_NOINLINE __attribute__((noinline))
57#elif defined(_MSC_VER) && _MSC_VER>=1310
58# define CSV_NOINLINE __declspec(noinline)
59#else
60# define CSV_NOINLINE
61#endif
62
63
64/* Max size of the error message in a CsvReader */
65#define CSV_MXERR 200
66
drhadcba642016-06-02 17:44:24 +000067/* Size of the CsvReader input buffer */
68#define CSV_INBUFSZ 1024
69
drh724b1892016-05-31 16:22:48 +000070/* A context object used when read a CSV file. */
71typedef struct CsvReader CsvReader;
72struct CsvReader {
73 FILE *in; /* Read the CSV text from this input stream */
74 char *z; /* Accumulated text for a field */
75 int n; /* Number of bytes in z */
76 int nAlloc; /* Space allocated for z[] */
77 int nLine; /* Current line number */
drhadcba642016-06-02 17:44:24 +000078 char cTerm; /* Character that terminated the most recent field */
79 size_t iIn; /* Next unread character in the input buffer */
80 size_t nIn; /* Number of characters in the input buffer */
81 char *zIn; /* The input buffer */
drh724b1892016-05-31 16:22:48 +000082 char zErr[CSV_MXERR]; /* Error message */
83};
84
85/* Initialize a CsvReader object */
86static void csv_reader_init(CsvReader *p){
drhadcba642016-06-02 17:44:24 +000087 p->in = 0;
88 p->z = 0;
89 p->n = 0;
90 p->nAlloc = 0;
91 p->nLine = 0;
92 p->nIn = 0;
93 p->zIn = 0;
94 p->zErr[0] = 0;
drh724b1892016-05-31 16:22:48 +000095}
96
97/* Close and reset a CsvReader object */
98static void csv_reader_reset(CsvReader *p){
drhadcba642016-06-02 17:44:24 +000099 if( p->in ){
100 fclose(p->in);
101 sqlite3_free(p->zIn);
102 }
drh724b1892016-05-31 16:22:48 +0000103 sqlite3_free(p->z);
104 csv_reader_init(p);
105}
106
107/* Report an error on a CsvReader */
108static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
109 va_list ap;
110 va_start(ap, zFormat);
111 sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
112 va_end(ap);
113}
114
115/* Open the file associated with a CsvReader
116** Return the number of errors.
117*/
drhadcba642016-06-02 17:44:24 +0000118static int csv_reader_open(
119 CsvReader *p, /* The reader to open */
120 const char *zFilename, /* Read from this filename */
121 const char *zData /* ... or use this data */
122){
123 if( zFilename ){
124 p->zIn = sqlite3_malloc( CSV_INBUFSZ );
125 if( p->zIn==0 ){
126 csv_errmsg(p, "out of memory");
127 return 1;
128 }
129 p->in = fopen(zFilename, "rb");
130 if( p->in==0 ){
131 csv_reader_reset(p);
132 csv_errmsg(p, "cannot open '%s' for reading", zFilename);
133 return 1;
134 }
135 }else{
136 assert( p->in==0 );
137 p->zIn = (char*)zData;
138 p->nIn = strlen(zData);
drh724b1892016-05-31 16:22:48 +0000139 }
140 return 0;
141}
142
drhadcba642016-06-02 17:44:24 +0000143/* The input buffer has overflowed. Refill the input buffer, then
144** return the next character
145*/
146static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
147 size_t got;
148
149 assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */
150 assert( p->in!=0 ); /* Only called if reading froma file */
151
152 got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
153 if( got==0 ) return EOF;
154 p->nIn = got;
155 p->iIn = 1;
156 return p->zIn[0];
157}
158
159/* Return the next character of input. Return EOF at end of input. */
160static int csv_getc(CsvReader *p){
161 if( p->iIn >= p->nIn ){
162 if( p->in!=0 ) return csv_getc_refill(p);
163 return EOF;
164 }
165 return p->zIn[p->iIn++];
166}
167
drh724b1892016-05-31 16:22:48 +0000168/* Increase the size of p->z and append character c to the end.
169** Return 0 on success and non-zero if there is an OOM error */
170static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
171 char *zNew;
172 int nNew = p->nAlloc*2 + 100;
173 zNew = sqlite3_realloc64(p->z, nNew);
174 if( zNew ){
175 p->z = zNew;
176 p->nAlloc = nNew;
177 p->z[p->n++] = c;
178 return 0;
179 }else{
180 csv_errmsg(p, "out of memory");
181 return 1;
182 }
183}
184
185/* Append a single character to the CsvReader.z[] array.
186** Return 0 on success and non-zero if there is an OOM error */
187static int csv_append(CsvReader *p, char c){
188 if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
189 p->z[p->n++] = c;
190 return 0;
191}
192
193/* Read a single field of CSV text. Compatible with rfc4180 and extended
194** with the option of having a separator other than ",".
195**
196** + Input comes from p->in.
197** + Store results in p->z of length p->n. Space to hold p->z comes
198** from sqlite3_malloc64().
199** + Keep track of the line number in p->nLine.
200** + Store the character that terminates the field in p->cTerm. Store
201** EOF on end-of-file.
202**
203** Return "" at EOF. Return 0 on an OOM error.
204*/
205static char *csv_read_one_field(CsvReader *p){
206 int c;
207 p->n = 0;
drhadcba642016-06-02 17:44:24 +0000208 c = csv_getc(p);
drh724b1892016-05-31 16:22:48 +0000209 if( c==EOF ){
210 p->cTerm = EOF;
211 return "";
212 }
213 if( c=='"' ){
214 int pc, ppc;
215 int startLine = p->nLine;
drh724b1892016-05-31 16:22:48 +0000216 pc = ppc = 0;
217 while( 1 ){
drhadcba642016-06-02 17:44:24 +0000218 c = csv_getc(p);
drhac9c3d22016-06-03 01:01:57 +0000219 if( c<='"' || pc=='"' ){
220 if( c=='\n' ) p->nLine++;
221 if( c=='"' ){
222 if( pc=='"' ){
223 pc = 0;
224 continue;
225 }
drh724b1892016-05-31 16:22:48 +0000226 }
drhac9c3d22016-06-03 01:01:57 +0000227 if( (c==',' && pc=='"')
228 || (c=='\n' && pc=='"')
229 || (c=='\n' && pc=='\r' && ppc=='"')
230 || (c==EOF && pc=='"')
231 ){
232 do{ p->n--; }while( p->z[p->n]!='"' );
233 p->cTerm = c;
234 break;
235 }
236 if( pc=='"' && c!='\r' ){
237 csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
238 break;
239 }
240 if( c==EOF ){
241 csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
242 startLine, '"');
243 p->cTerm = c;
244 break;
245 }
drh724b1892016-05-31 16:22:48 +0000246 }
247 if( csv_append(p, (char)c) ) return 0;
248 ppc = pc;
249 pc = c;
250 }
251 }else{
drhac9c3d22016-06-03 01:01:57 +0000252 while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
drh724b1892016-05-31 16:22:48 +0000253 if( csv_append(p, (char)c) ) return 0;
drhadcba642016-06-02 17:44:24 +0000254 c = csv_getc(p);
drh724b1892016-05-31 16:22:48 +0000255 }
256 if( c=='\n' ){
257 p->nLine++;
258 if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
259 }
260 p->cTerm = c;
261 }
262 if( p->z ) p->z[p->n] = 0;
263 return p->z;
264}
265
266
267/* Forward references to the various virtual table methods implemented
268** in this file. */
269static int csvtabCreate(sqlite3*, void*, int, const char*const*,
270 sqlite3_vtab**,char**);
271static int csvtabConnect(sqlite3*, void*, int, const char*const*,
272 sqlite3_vtab**,char**);
273static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
274static int csvtabDisconnect(sqlite3_vtab*);
275static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
276static int csvtabClose(sqlite3_vtab_cursor*);
277static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
278 int argc, sqlite3_value **argv);
279static int csvtabNext(sqlite3_vtab_cursor*);
280static int csvtabEof(sqlite3_vtab_cursor*);
281static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
282static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
283
284/* An instance of the CSV virtual table */
285typedef struct CsvTable {
286 sqlite3_vtab base; /* Base class. Must be first */
287 char *zFilename; /* Name of the CSV file */
drhadcba642016-06-02 17:44:24 +0000288 char *zData; /* Raw CSV data in lieu of zFilename */
drh724b1892016-05-31 16:22:48 +0000289 long iStart; /* Offset to start of data in zFilename */
290 int nCol; /* Number of columns in the CSV file */
drhabfd2722016-05-31 18:08:35 +0000291 unsigned int tstFlags; /* Bit values used for testing */
drh724b1892016-05-31 16:22:48 +0000292} CsvTable;
293
drhabfd2722016-05-31 18:08:35 +0000294/* Allowed values for tstFlags */
295#define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/
296
drh724b1892016-05-31 16:22:48 +0000297/* A cursor for the CSV virtual table */
298typedef struct CsvCursor {
299 sqlite3_vtab_cursor base; /* Base class. Must be first */
300 CsvReader rdr; /* The CsvReader object */
301 char **azVal; /* Value of the current row */
drhac9c3d22016-06-03 01:01:57 +0000302 int *aLen; /* Length of each entry */
drh724b1892016-05-31 16:22:48 +0000303 sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */
304} CsvCursor;
305
306/* Transfer error message text from a reader into a CsvTable */
307static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
308 sqlite3_free(pTab->base.zErrMsg);
309 pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
310}
311
312/*
313** This method is the destructor fo a CsvTable object.
314*/
315static int csvtabDisconnect(sqlite3_vtab *pVtab){
316 CsvTable *p = (CsvTable*)pVtab;
317 sqlite3_free(p->zFilename);
drh35db31b2016-06-02 23:13:21 +0000318 sqlite3_free(p->zData);
drh724b1892016-05-31 16:22:48 +0000319 sqlite3_free(p);
320 return SQLITE_OK;
321}
322
323/* Skip leading whitespace. Return a pointer to the first non-whitespace
324** character, or to the zero terminator if the string has only whitespace */
325static const char *csv_skip_whitespace(const char *z){
326 while( isspace((unsigned char)z[0]) ) z++;
327 return z;
328}
329
330/* Remove trailing whitespace from the end of string z[] */
331static void csv_trim_whitespace(char *z){
332 size_t n = strlen(z);
333 while( n>0 && isspace((unsigned char)z[n]) ) n--;
334 z[n] = 0;
335}
336
337/* Dequote the string */
338static void csv_dequote(char *z){
339 int i, j;
340 char cQuote = z[0];
341 size_t n;
342
343 if( cQuote!='\'' && cQuote!='"' ) return;
344 n = strlen(z);
345 if( n<2 || z[n-1]!=z[0] ) return;
346 for(i=1, j=0; i<n-1; i++){
347 if( z[i]==cQuote && z[i+1]==cQuote ) i++;
348 z[j++] = z[i];
349 }
350 z[j] = 0;
351}
352
353/* Check to see if the string is of the form: "TAG = VALUE" with optional
354** whitespace before and around tokens. If it is, return a pointer to the
355** first character of VALUE. If it is not, return NULL.
356*/
357static const char *csv_parameter(const char *zTag, int nTag, const char *z){
358 z = csv_skip_whitespace(z);
359 if( strncmp(zTag, z, nTag)!=0 ) return 0;
360 z = csv_skip_whitespace(z+nTag);
361 if( z[0]!='=' ) return 0;
362 return csv_skip_whitespace(z+1);
363}
364
drhadcba642016-06-02 17:44:24 +0000365/* Decode a parameter that requires a dequoted string.
366**
367** Return 1 if the parameter is seen, or 0 if not. 1 is returned
368** even if there is an error. If an error occurs, then an error message
369** is left in p->zErr. If there are no errors, p->zErr[0]==0.
370*/
371static int csv_string_parameter(
372 CsvReader *p, /* Leave the error message here, if there is one */
373 const char *zParam, /* Parameter we are checking for */
374 const char *zArg, /* Raw text of the virtual table argment */
375 char **pzVal /* Write the dequoted string value here */
376){
377 const char *zValue;
drh11499f02016-07-09 16:38:25 +0000378 zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
drhadcba642016-06-02 17:44:24 +0000379 if( zValue==0 ) return 0;
380 p->zErr[0] = 0;
381 if( *pzVal ){
382 csv_errmsg(p, "more than one '%s' parameter", zParam);
383 return 1;
384 }
385 *pzVal = sqlite3_mprintf("%s", zValue);
386 if( *pzVal==0 ){
387 csv_errmsg(p, "out of memory");
388 return 1;
389 }
390 csv_trim_whitespace(*pzVal);
391 csv_dequote(*pzVal);
392 return 1;
393}
394
395
drh724b1892016-05-31 16:22:48 +0000396/* Return 0 if the argument is false and 1 if it is true. Return -1 if
397** we cannot really tell.
398*/
399static int csv_boolean(const char *z){
400 if( sqlite3_stricmp("yes",z)==0
401 || sqlite3_stricmp("on",z)==0
402 || sqlite3_stricmp("true",z)==0
403 || (z[0]=='1' && z[0]==0)
404 ){
405 return 1;
406 }
407 if( sqlite3_stricmp("no",z)==0
408 || sqlite3_stricmp("off",z)==0
409 || sqlite3_stricmp("false",z)==0
410 || (z[0]=='0' && z[1]==0)
411 ){
412 return 0;
413 }
414 return -1;
415}
416
417
418/*
419** Parameters:
drhadcba642016-06-02 17:44:24 +0000420** filename=FILENAME Name of file containing CSV content
421** data=TEXT Direct CSV content.
drh1fc1a0f2016-05-31 18:44:33 +0000422** schema=SCHEMA Alternative CSV schema.
drh724b1892016-05-31 16:22:48 +0000423** header=YES|NO First row of CSV defines the names of
424** columns if "yes". Default "no".
drhadcba642016-06-02 17:44:24 +0000425** columns=N Assume the CSV file contains N columns.
drhac9c3d22016-06-03 01:01:57 +0000426**
427** Only available if compiled with SQLITE_TEST:
428**
drhabfd2722016-05-31 18:08:35 +0000429** testflags=N Bitmask of test flags. Optional
drh724b1892016-05-31 16:22:48 +0000430**
drh1fc1a0f2016-05-31 18:44:33 +0000431** If schema= is omitted, then the columns are named "c0", "c1", "c2",
432** and so forth. If columns=N is omitted, then the file is opened and
433** the number of columns in the first row is counted to determine the
434** column count. If header=YES, then the first row is skipped.
drh724b1892016-05-31 16:22:48 +0000435*/
436static int csvtabConnect(
437 sqlite3 *db,
438 void *pAux,
439 int argc, const char *const*argv,
440 sqlite3_vtab **ppVtab,
441 char **pzErr
442){
drh1fc1a0f2016-05-31 18:44:33 +0000443 CsvTable *pNew = 0; /* The CsvTable object to construct */
444 int bHeader = -1; /* header= flags. -1 means not seen yet */
445 int rc = SQLITE_OK; /* Result code from this routine */
drhadcba642016-06-02 17:44:24 +0000446 int i, j; /* Loop counters */
drhac9c3d22016-06-03 01:01:57 +0000447#ifdef SQLITE_TEST
drhadcba642016-06-02 17:44:24 +0000448 int tstFlags = 0; /* Value for testflags=N parameter */
drhac9c3d22016-06-03 01:01:57 +0000449#endif
drh1fc1a0f2016-05-31 18:44:33 +0000450 int nCol = -99; /* Value of the columns= parameter */
451 CsvReader sRdr; /* A CSV file reader used to store an error
452 ** message and/or to count the number of columns */
drhadcba642016-06-02 17:44:24 +0000453 static const char *azParam[] = {
454 "filename", "data", "schema",
455 };
456 char *azPValue[3]; /* Parameter values */
457# define CSV_FILENAME (azPValue[0])
458# define CSV_DATA (azPValue[1])
459# define CSV_SCHEMA (azPValue[2])
drh724b1892016-05-31 16:22:48 +0000460
drhadcba642016-06-02 17:44:24 +0000461
462 assert( sizeof(azPValue)==sizeof(azParam) );
drh724b1892016-05-31 16:22:48 +0000463 memset(&sRdr, 0, sizeof(sRdr));
drhadcba642016-06-02 17:44:24 +0000464 memset(azPValue, 0, sizeof(azPValue));
drh724b1892016-05-31 16:22:48 +0000465 for(i=3; i<argc; i++){
466 const char *z = argv[i];
467 const char *zValue;
drhadcba642016-06-02 17:44:24 +0000468 for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
469 if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
470 }
471 if( j<sizeof(azParam)/sizeof(azParam[0]) ){
472 if( sRdr.zErr[0] ) goto csvtab_connect_error;
drh724b1892016-05-31 16:22:48 +0000473 }else
474 if( (zValue = csv_parameter("header",6,z))!=0 ){
475 int x;
476 if( bHeader>=0 ){
477 csv_errmsg(&sRdr, "more than one 'header' parameter");
478 goto csvtab_connect_error;
479 }
480 x = csv_boolean(zValue);
481 if( x==1 ){
482 bHeader = 1;
483 }else if( x==0 ){
484 bHeader = 0;
485 }else{
486 csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue);
487 goto csvtab_connect_error;
488 }
489 }else
drhac9c3d22016-06-03 01:01:57 +0000490#ifdef SQLITE_TEST
drhabfd2722016-05-31 18:08:35 +0000491 if( (zValue = csv_parameter("testflags",9,z))!=0 ){
492 tstFlags = (unsigned int)atoi(zValue);
493 }else
drhac9c3d22016-06-03 01:01:57 +0000494#endif
drh1fc1a0f2016-05-31 18:44:33 +0000495 if( (zValue = csv_parameter("columns",7,z))!=0 ){
496 if( nCol>0 ){
497 csv_errmsg(&sRdr, "more than one 'columns' parameter");
498 goto csvtab_connect_error;
499 }
500 nCol = atoi(zValue);
501 if( nCol<=0 ){
502 csv_errmsg(&sRdr, "must have at least one column");
503 goto csvtab_connect_error;
504 }
505 }else
drh724b1892016-05-31 16:22:48 +0000506 {
507 csv_errmsg(&sRdr, "unrecognized parameter '%s'", z);
508 goto csvtab_connect_error;
509 }
510 }
drhadcba642016-06-02 17:44:24 +0000511 if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
512 csv_errmsg(&sRdr, "must either filename= or data= but not both");
drh724b1892016-05-31 16:22:48 +0000513 goto csvtab_connect_error;
514 }
drhadcba642016-06-02 17:44:24 +0000515 if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){
drh724b1892016-05-31 16:22:48 +0000516 goto csvtab_connect_error;
517 }
518 pNew = sqlite3_malloc( sizeof(*pNew) );
519 *ppVtab = (sqlite3_vtab*)pNew;
520 if( pNew==0 ) goto csvtab_connect_oom;
521 memset(pNew, 0, sizeof(*pNew));
drh1fc1a0f2016-05-31 18:44:33 +0000522 if( nCol>0 ){
523 pNew->nCol = nCol;
524 }else{
525 do{
526 const char *z = csv_read_one_field(&sRdr);
527 if( z==0 ) goto csvtab_connect_oom;
528 pNew->nCol++;
529 }while( sRdr.cTerm==',' );
530 }
drhadcba642016-06-02 17:44:24 +0000531 pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
532 pNew->zData = CSV_DATA; CSV_DATA = 0;
drhac9c3d22016-06-03 01:01:57 +0000533#ifdef SQLITE_TEST
drhabfd2722016-05-31 18:08:35 +0000534 pNew->tstFlags = tstFlags;
drhac9c3d22016-06-03 01:01:57 +0000535#endif
drh724b1892016-05-31 16:22:48 +0000536 pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
537 csv_reader_reset(&sRdr);
drhadcba642016-06-02 17:44:24 +0000538 if( CSV_SCHEMA==0 ){
drh724b1892016-05-31 16:22:48 +0000539 char *zSep = "";
drhadcba642016-06-02 17:44:24 +0000540 CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x(");
541 if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
drh724b1892016-05-31 16:22:48 +0000542 for(i=0; i<pNew->nCol; i++){
drhadcba642016-06-02 17:44:24 +0000543 CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
drh724b1892016-05-31 16:22:48 +0000544 zSep = ",";
545 }
drhadcba642016-06-02 17:44:24 +0000546 CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA);
drh724b1892016-05-31 16:22:48 +0000547 }
drhadcba642016-06-02 17:44:24 +0000548 rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
drh724b1892016-05-31 16:22:48 +0000549 if( rc ) goto csvtab_connect_error;
drhadcba642016-06-02 17:44:24 +0000550 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
551 sqlite3_free(azPValue[i]);
552 }
drh724b1892016-05-31 16:22:48 +0000553 return SQLITE_OK;
554
555csvtab_connect_oom:
556 rc = SQLITE_NOMEM;
557 csv_errmsg(&sRdr, "out of memory");
558
559csvtab_connect_error:
560 if( pNew ) csvtabDisconnect(&pNew->base);
drhadcba642016-06-02 17:44:24 +0000561 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
562 sqlite3_free(azPValue[i]);
563 }
drh724b1892016-05-31 16:22:48 +0000564 if( sRdr.zErr[0] ){
565 sqlite3_free(*pzErr);
566 *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
567 }
568 csv_reader_reset(&sRdr);
drhabfd2722016-05-31 18:08:35 +0000569 if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
drh724b1892016-05-31 16:22:48 +0000570 return rc;
571}
572
573/*
574** Reset the current row content held by a CsvCursor.
575*/
576static void csvtabCursorRowReset(CsvCursor *pCur){
577 CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
578 int i;
579 for(i=0; i<pTab->nCol; i++){
580 sqlite3_free(pCur->azVal[i]);
581 pCur->azVal[i] = 0;
drhac9c3d22016-06-03 01:01:57 +0000582 pCur->aLen[i] = 0;
drh724b1892016-05-31 16:22:48 +0000583 }
584}
585
586/*
587** The xConnect and xCreate methods do the same thing, but they must be
588** different so that the virtual table is not an eponymous virtual table.
589*/
590static int csvtabCreate(
591 sqlite3 *db,
592 void *pAux,
593 int argc, const char *const*argv,
594 sqlite3_vtab **ppVtab,
595 char **pzErr
596){
597 return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
598}
599
600/*
601** Destructor for a CsvCursor.
602*/
603static int csvtabClose(sqlite3_vtab_cursor *cur){
604 CsvCursor *pCur = (CsvCursor*)cur;
605 csvtabCursorRowReset(pCur);
606 csv_reader_reset(&pCur->rdr);
607 sqlite3_free(cur);
608 return SQLITE_OK;
609}
610
611/*
612** Constructor for a new CsvTable cursor object.
613*/
614static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
615 CsvTable *pTab = (CsvTable*)p;
616 CsvCursor *pCur;
drhac9c3d22016-06-03 01:01:57 +0000617 size_t nByte;
618 nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
drh11499f02016-07-09 16:38:25 +0000619 pCur = sqlite3_malloc64( nByte );
drh724b1892016-05-31 16:22:48 +0000620 if( pCur==0 ) return SQLITE_NOMEM;
drhac9c3d22016-06-03 01:01:57 +0000621 memset(pCur, 0, nByte);
drh724b1892016-05-31 16:22:48 +0000622 pCur->azVal = (char**)&pCur[1];
drhac9c3d22016-06-03 01:01:57 +0000623 pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
drh724b1892016-05-31 16:22:48 +0000624 *ppCursor = &pCur->base;
drhadcba642016-06-02 17:44:24 +0000625 if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
drh724b1892016-05-31 16:22:48 +0000626 csv_xfer_error(pTab, &pCur->rdr);
627 return SQLITE_ERROR;
628 }
629 return SQLITE_OK;
630}
631
632
633/*
634** Advance a CsvCursor to its next row of input.
635** Set the EOF marker if we reach the end of input.
636*/
637static int csvtabNext(sqlite3_vtab_cursor *cur){
638 CsvCursor *pCur = (CsvCursor*)cur;
639 CsvTable *pTab = (CsvTable*)cur->pVtab;
640 int i = 0;
641 char *z;
drh724b1892016-05-31 16:22:48 +0000642 do{
643 z = csv_read_one_field(&pCur->rdr);
644 if( z==0 ){
645 csv_xfer_error(pTab, &pCur->rdr);
646 break;
647 }
drh724b1892016-05-31 16:22:48 +0000648 if( i<pTab->nCol ){
drhac9c3d22016-06-03 01:01:57 +0000649 if( pCur->aLen[i] < pCur->rdr.n+1 ){
drh11499f02016-07-09 16:38:25 +0000650 char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
drhac9c3d22016-06-03 01:01:57 +0000651 if( zNew==0 ){
652 csv_errmsg(&pCur->rdr, "out of memory");
653 csv_xfer_error(pTab, &pCur->rdr);
654 break;
655 }
656 pCur->azVal[i] = zNew;
657 pCur->aLen[i] = pCur->rdr.n+1;
658 }
659 memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
660 i++;
drh724b1892016-05-31 16:22:48 +0000661 }
drhac9c3d22016-06-03 01:01:57 +0000662 }while( pCur->rdr.cTerm==',' );
663 while( i<pTab->nCol ){
664 sqlite3_free(pCur->azVal[i]);
665 pCur->azVal[i] = 0;
666 pCur->aLen[i] = 0;
667 i++;
668 }
drh724b1892016-05-31 16:22:48 +0000669 if( z==0 || pCur->rdr.cTerm==EOF ){
670 pCur->iRowid = -1;
671 }else{
672 pCur->iRowid++;
673 }
674 return SQLITE_OK;
675}
676
677/*
678** Return values of columns for the row at which the CsvCursor
679** is currently pointing.
680*/
681static int csvtabColumn(
682 sqlite3_vtab_cursor *cur, /* The cursor */
683 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
684 int i /* Which column to return */
685){
686 CsvCursor *pCur = (CsvCursor*)cur;
687 CsvTable *pTab = (CsvTable*)cur->pVtab;
688 if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
689 sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC);
690 }
691 return SQLITE_OK;
692}
693
694/*
695** Return the rowid for the current row.
696*/
697static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
698 CsvCursor *pCur = (CsvCursor*)cur;
699 *pRowid = pCur->iRowid;
700 return SQLITE_OK;
701}
702
703/*
704** Return TRUE if the cursor has been moved off of the last
705** row of output.
706*/
707static int csvtabEof(sqlite3_vtab_cursor *cur){
708 CsvCursor *pCur = (CsvCursor*)cur;
709 return pCur->iRowid<0;
710}
711
712/*
713** Only a full table scan is supported. So xFilter simply rewinds to
714** the beginning.
715*/
716static int csvtabFilter(
717 sqlite3_vtab_cursor *pVtabCursor,
718 int idxNum, const char *idxStr,
719 int argc, sqlite3_value **argv
720){
721 CsvCursor *pCur = (CsvCursor*)pVtabCursor;
722 CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
723 pCur->iRowid = 0;
drhadcba642016-06-02 17:44:24 +0000724 if( pCur->rdr.in==0 ){
725 assert( pCur->rdr.zIn==pTab->zData );
726 assert( pTab->iStart<=pCur->rdr.nIn );
727 pCur->rdr.iIn = pTab->iStart;
728 }else{
729 fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
730 pCur->rdr.iIn = 0;
731 pCur->rdr.nIn = 0;
732 }
drh724b1892016-05-31 16:22:48 +0000733 return csvtabNext(pVtabCursor);
734}
735
736/*
drhadcba642016-06-02 17:44:24 +0000737** Only a forward full table scan is supported. xBestIndex is mostly
drhabfd2722016-05-31 18:08:35 +0000738** a no-op. If CSVTEST_FIDX is set, then the presence of equality
739** constraints lowers the estimated cost, which is fiction, but is useful
740** for testing certain kinds of virtual table behavior.
drh724b1892016-05-31 16:22:48 +0000741*/
742static int csvtabBestIndex(
743 sqlite3_vtab *tab,
744 sqlite3_index_info *pIdxInfo
745){
drhabfd2722016-05-31 18:08:35 +0000746 pIdxInfo->estimatedCost = 1000000;
drhac9c3d22016-06-03 01:01:57 +0000747#ifdef SQLITE_TEST
748 if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
749 /* The usual (and sensible) case is to always do a full table scan.
750 ** The code in this branch only runs when testflags=1. This code
751 ** generates an artifical and unrealistic plan which is useful
752 ** for testing virtual table logic but is not helpful to real applications.
753 **
754 ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
755 ** table (even though it is not) and the cost of running the virtual table
756 ** is reduced from 1 million to just 10. The constraints are *not* marked
757 ** as omittable, however, so the query planner should still generate a
758 ** plan that gives a correct answer, even if they plan is not optimal.
759 */
760 int i;
761 int nConst = 0;
762 for(i=0; i<pIdxInfo->nConstraint; i++){
763 unsigned char op;
764 if( pIdxInfo->aConstraint[i].usable==0 ) continue;
765 op = pIdxInfo->aConstraint[i].op;
766 if( op==SQLITE_INDEX_CONSTRAINT_EQ
767 || op==SQLITE_INDEX_CONSTRAINT_LIKE
768 || op==SQLITE_INDEX_CONSTRAINT_GLOB
769 ){
770 pIdxInfo->estimatedCost = 10;
771 pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
772 nConst++;
773 }
drhabfd2722016-05-31 18:08:35 +0000774 }
775 }
drhac9c3d22016-06-03 01:01:57 +0000776#endif
drh724b1892016-05-31 16:22:48 +0000777 return SQLITE_OK;
778}
779
780
781static sqlite3_module CsvModule = {
782 0, /* iVersion */
783 csvtabCreate, /* xCreate */
784 csvtabConnect, /* xConnect */
785 csvtabBestIndex, /* xBestIndex */
786 csvtabDisconnect, /* xDisconnect */
787 csvtabDisconnect, /* xDestroy */
788 csvtabOpen, /* xOpen - open a cursor */
789 csvtabClose, /* xClose - close a cursor */
790 csvtabFilter, /* xFilter - configure scan constraints */
791 csvtabNext, /* xNext - advance a cursor */
792 csvtabEof, /* xEof - check for end of scan */
793 csvtabColumn, /* xColumn - read data */
794 csvtabRowid, /* xRowid - read data */
795 0, /* xUpdate */
796 0, /* xBegin */
797 0, /* xSync */
798 0, /* xCommit */
799 0, /* xRollback */
800 0, /* xFindMethod */
801 0, /* xRename */
802};
803
drhac9c3d22016-06-03 01:01:57 +0000804#ifdef SQLITE_TEST
805/*
806** For virtual table testing, make a version of the CSV virtual table
807** available that has an xUpdate function. But the xUpdate always returns
808** SQLITE_READONLY since the CSV file is not really writable.
809*/
810static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
811 return SQLITE_READONLY;
812}
813static sqlite3_module CsvModuleFauxWrite = {
814 0, /* iVersion */
815 csvtabCreate, /* xCreate */
816 csvtabConnect, /* xConnect */
817 csvtabBestIndex, /* xBestIndex */
818 csvtabDisconnect, /* xDisconnect */
819 csvtabDisconnect, /* xDestroy */
820 csvtabOpen, /* xOpen - open a cursor */
821 csvtabClose, /* xClose - close a cursor */
822 csvtabFilter, /* xFilter - configure scan constraints */
823 csvtabNext, /* xNext - advance a cursor */
824 csvtabEof, /* xEof - check for end of scan */
825 csvtabColumn, /* xColumn - read data */
826 csvtabRowid, /* xRowid - read data */
827 csvtabUpdate, /* xUpdate */
828 0, /* xBegin */
829 0, /* xSync */
830 0, /* xCommit */
831 0, /* xRollback */
832 0, /* xFindMethod */
833 0, /* xRename */
834};
835#endif /* SQLITE_TEST */
836
837
838
drh724b1892016-05-31 16:22:48 +0000839#ifdef _WIN32
840__declspec(dllexport)
841#endif
842/*
843** This routine is called when the extension is loaded. The new
844** CSV virtual table module is registered with the calling database
845** connection.
846*/
847int sqlite3_csv_init(
848 sqlite3 *db,
849 char **pzErrMsg,
850 const sqlite3_api_routines *pApi
851){
drhac9c3d22016-06-03 01:01:57 +0000852 int rc;
drh724b1892016-05-31 16:22:48 +0000853 SQLITE_EXTENSION_INIT2(pApi);
drhac9c3d22016-06-03 01:01:57 +0000854 rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
855#ifdef SQLITE_TEST
856 if( rc==SQLITE_OK ){
857 rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
858 }
859#endif
860 return rc;
drh724b1892016-05-31 16:22:48 +0000861}