blob: 343c866c958fddcb817225211e95be6e6795450f [file] [log] [blame]
drh724b1892016-05-31 16:22:48 +00001/*
2** 2016-05-28
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11******************************************************************************
12**
13** This file contains the implementation of an SQLite virtual table for
14** reading CSV files.
15**
16** Usage:
17**
18** .load ./csv
19** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
20** SELECT * FROM csv;
21**
22** The columns are named "c1", "c2", "c3", ... by default. But the
23** application can define its own CREATE TABLE statement as an additional
24** parameter. For example:
25**
26** CREATE VIRTUAL TABLE temp.csv2 USING csv(
27** filename = "../http.log",
28** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
29** );
30*/
31#include <sqlite3ext.h>
32SQLITE_EXTENSION_INIT1
33#include <string.h>
34#include <stdlib.h>
35#include <assert.h>
36#include <stdarg.h>
37#include <ctype.h>
38#include <stdio.h>
39
40/*
41** A macro to hint to the compiler that a function should not be
42** inlined.
43*/
44#if defined(__GNUC__)
45# define CSV_NOINLINE __attribute__((noinline))
46#elif defined(_MSC_VER) && _MSC_VER>=1310
47# define CSV_NOINLINE __declspec(noinline)
48#else
49# define CSV_NOINLINE
50#endif
51
52
53/* Max size of the error message in a CsvReader */
54#define CSV_MXERR 200
55
drhadcba642016-06-02 17:44:24 +000056/* Size of the CsvReader input buffer */
57#define CSV_INBUFSZ 1024
58
drh724b1892016-05-31 16:22:48 +000059/* A context object used when read a CSV file. */
60typedef struct CsvReader CsvReader;
61struct CsvReader {
62 FILE *in; /* Read the CSV text from this input stream */
63 char *z; /* Accumulated text for a field */
64 int n; /* Number of bytes in z */
65 int nAlloc; /* Space allocated for z[] */
66 int nLine; /* Current line number */
drhadcba642016-06-02 17:44:24 +000067 char cTerm; /* Character that terminated the most recent field */
68 size_t iIn; /* Next unread character in the input buffer */
69 size_t nIn; /* Number of characters in the input buffer */
70 char *zIn; /* The input buffer */
drh724b1892016-05-31 16:22:48 +000071 char zErr[CSV_MXERR]; /* Error message */
72};
73
74/* Initialize a CsvReader object */
75static void csv_reader_init(CsvReader *p){
drhadcba642016-06-02 17:44:24 +000076 p->in = 0;
77 p->z = 0;
78 p->n = 0;
79 p->nAlloc = 0;
80 p->nLine = 0;
81 p->nIn = 0;
82 p->zIn = 0;
83 p->zErr[0] = 0;
drh724b1892016-05-31 16:22:48 +000084}
85
86/* Close and reset a CsvReader object */
87static void csv_reader_reset(CsvReader *p){
drhadcba642016-06-02 17:44:24 +000088 if( p->in ){
89 fclose(p->in);
90 sqlite3_free(p->zIn);
91 }
drh724b1892016-05-31 16:22:48 +000092 sqlite3_free(p->z);
93 csv_reader_init(p);
94}
95
96/* Report an error on a CsvReader */
97static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
98 va_list ap;
99 va_start(ap, zFormat);
100 sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
101 va_end(ap);
102}
103
104/* Open the file associated with a CsvReader
105** Return the number of errors.
106*/
drhadcba642016-06-02 17:44:24 +0000107static int csv_reader_open(
108 CsvReader *p, /* The reader to open */
109 const char *zFilename, /* Read from this filename */
110 const char *zData /* ... or use this data */
111){
112 if( zFilename ){
113 p->zIn = sqlite3_malloc( CSV_INBUFSZ );
114 if( p->zIn==0 ){
115 csv_errmsg(p, "out of memory");
116 return 1;
117 }
118 p->in = fopen(zFilename, "rb");
119 if( p->in==0 ){
120 csv_reader_reset(p);
121 csv_errmsg(p, "cannot open '%s' for reading", zFilename);
122 return 1;
123 }
124 }else{
125 assert( p->in==0 );
126 p->zIn = (char*)zData;
127 p->nIn = strlen(zData);
drh724b1892016-05-31 16:22:48 +0000128 }
129 return 0;
130}
131
drhadcba642016-06-02 17:44:24 +0000132/* The input buffer has overflowed. Refill the input buffer, then
133** return the next character
134*/
135static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
136 size_t got;
137
138 assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */
139 assert( p->in!=0 ); /* Only called if reading froma file */
140
141 got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
142 if( got==0 ) return EOF;
143 p->nIn = got;
144 p->iIn = 1;
145 return p->zIn[0];
146}
147
148/* Return the next character of input. Return EOF at end of input. */
149static int csv_getc(CsvReader *p){
150 if( p->iIn >= p->nIn ){
151 if( p->in!=0 ) return csv_getc_refill(p);
152 return EOF;
153 }
154 return p->zIn[p->iIn++];
155}
156
drh724b1892016-05-31 16:22:48 +0000157/* Increase the size of p->z and append character c to the end.
158** Return 0 on success and non-zero if there is an OOM error */
159static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
160 char *zNew;
161 int nNew = p->nAlloc*2 + 100;
162 zNew = sqlite3_realloc64(p->z, nNew);
163 if( zNew ){
164 p->z = zNew;
165 p->nAlloc = nNew;
166 p->z[p->n++] = c;
167 return 0;
168 }else{
169 csv_errmsg(p, "out of memory");
170 return 1;
171 }
172}
173
174/* Append a single character to the CsvReader.z[] array.
175** Return 0 on success and non-zero if there is an OOM error */
176static int csv_append(CsvReader *p, char c){
177 if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
178 p->z[p->n++] = c;
179 return 0;
180}
181
182/* Read a single field of CSV text. Compatible with rfc4180 and extended
183** with the option of having a separator other than ",".
184**
185** + Input comes from p->in.
186** + Store results in p->z of length p->n. Space to hold p->z comes
187** from sqlite3_malloc64().
188** + Keep track of the line number in p->nLine.
189** + Store the character that terminates the field in p->cTerm. Store
190** EOF on end-of-file.
191**
192** Return "" at EOF. Return 0 on an OOM error.
193*/
194static char *csv_read_one_field(CsvReader *p){
195 int c;
196 p->n = 0;
drhadcba642016-06-02 17:44:24 +0000197 c = csv_getc(p);
drh724b1892016-05-31 16:22:48 +0000198 if( c==EOF ){
199 p->cTerm = EOF;
200 return "";
201 }
202 if( c=='"' ){
203 int pc, ppc;
204 int startLine = p->nLine;
205 int cQuote = c;
206 pc = ppc = 0;
207 while( 1 ){
drhadcba642016-06-02 17:44:24 +0000208 c = csv_getc(p);
drh724b1892016-05-31 16:22:48 +0000209 if( c=='\n' ) p->nLine++;
210 if( c==cQuote ){
211 if( pc==cQuote ){
212 pc = 0;
213 continue;
214 }
215 }
216 if( (c==',' && pc==cQuote)
217 || (c=='\n' && pc==cQuote)
218 || (c=='\n' && pc=='\r' && ppc==cQuote)
219 || (c==EOF && pc==cQuote)
220 ){
221 do{ p->n--; }while( p->z[p->n]!=cQuote );
222 p->cTerm = c;
223 break;
224 }
225 if( pc==cQuote && c!='\r' ){
226 csv_errmsg(p, "line %d: unescaped %c character", p->nLine, cQuote);
227 break;
228 }
229 if( c==EOF ){
230 csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
231 startLine, cQuote);
232 p->cTerm = c;
233 break;
234 }
235 if( csv_append(p, (char)c) ) return 0;
236 ppc = pc;
237 pc = c;
238 }
239 }else{
240 while( c!=EOF && c!=',' && c!='\n' ){
241 if( csv_append(p, (char)c) ) return 0;
drhadcba642016-06-02 17:44:24 +0000242 c = csv_getc(p);
drh724b1892016-05-31 16:22:48 +0000243 }
244 if( c=='\n' ){
245 p->nLine++;
246 if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
247 }
248 p->cTerm = c;
249 }
250 if( p->z ) p->z[p->n] = 0;
251 return p->z;
252}
253
254
255/* Forward references to the various virtual table methods implemented
256** in this file. */
257static int csvtabCreate(sqlite3*, void*, int, const char*const*,
258 sqlite3_vtab**,char**);
259static int csvtabConnect(sqlite3*, void*, int, const char*const*,
260 sqlite3_vtab**,char**);
261static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
262static int csvtabDisconnect(sqlite3_vtab*);
263static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
264static int csvtabClose(sqlite3_vtab_cursor*);
265static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
266 int argc, sqlite3_value **argv);
267static int csvtabNext(sqlite3_vtab_cursor*);
268static int csvtabEof(sqlite3_vtab_cursor*);
269static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
270static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
271
272/* An instance of the CSV virtual table */
273typedef struct CsvTable {
274 sqlite3_vtab base; /* Base class. Must be first */
275 char *zFilename; /* Name of the CSV file */
drhadcba642016-06-02 17:44:24 +0000276 char *zData; /* Raw CSV data in lieu of zFilename */
drh724b1892016-05-31 16:22:48 +0000277 long iStart; /* Offset to start of data in zFilename */
278 int nCol; /* Number of columns in the CSV file */
drhabfd2722016-05-31 18:08:35 +0000279 unsigned int tstFlags; /* Bit values used for testing */
drh724b1892016-05-31 16:22:48 +0000280} CsvTable;
281
drhabfd2722016-05-31 18:08:35 +0000282/* Allowed values for tstFlags */
283#define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/
284
drh724b1892016-05-31 16:22:48 +0000285/* A cursor for the CSV virtual table */
286typedef struct CsvCursor {
287 sqlite3_vtab_cursor base; /* Base class. Must be first */
288 CsvReader rdr; /* The CsvReader object */
289 char **azVal; /* Value of the current row */
290 sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */
291} CsvCursor;
292
293/* Transfer error message text from a reader into a CsvTable */
294static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
295 sqlite3_free(pTab->base.zErrMsg);
296 pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
297}
298
299/*
300** This method is the destructor fo a CsvTable object.
301*/
302static int csvtabDisconnect(sqlite3_vtab *pVtab){
303 CsvTable *p = (CsvTable*)pVtab;
304 sqlite3_free(p->zFilename);
drh35db31b2016-06-02 23:13:21 +0000305 sqlite3_free(p->zData);
drh724b1892016-05-31 16:22:48 +0000306 sqlite3_free(p);
307 return SQLITE_OK;
308}
309
310/* Skip leading whitespace. Return a pointer to the first non-whitespace
311** character, or to the zero terminator if the string has only whitespace */
312static const char *csv_skip_whitespace(const char *z){
313 while( isspace((unsigned char)z[0]) ) z++;
314 return z;
315}
316
317/* Remove trailing whitespace from the end of string z[] */
318static void csv_trim_whitespace(char *z){
319 size_t n = strlen(z);
320 while( n>0 && isspace((unsigned char)z[n]) ) n--;
321 z[n] = 0;
322}
323
324/* Dequote the string */
325static void csv_dequote(char *z){
326 int i, j;
327 char cQuote = z[0];
328 size_t n;
329
330 if( cQuote!='\'' && cQuote!='"' ) return;
331 n = strlen(z);
332 if( n<2 || z[n-1]!=z[0] ) return;
333 for(i=1, j=0; i<n-1; i++){
334 if( z[i]==cQuote && z[i+1]==cQuote ) i++;
335 z[j++] = z[i];
336 }
337 z[j] = 0;
338}
339
340/* Check to see if the string is of the form: "TAG = VALUE" with optional
341** whitespace before and around tokens. If it is, return a pointer to the
342** first character of VALUE. If it is not, return NULL.
343*/
344static const char *csv_parameter(const char *zTag, int nTag, const char *z){
345 z = csv_skip_whitespace(z);
346 if( strncmp(zTag, z, nTag)!=0 ) return 0;
347 z = csv_skip_whitespace(z+nTag);
348 if( z[0]!='=' ) return 0;
349 return csv_skip_whitespace(z+1);
350}
351
drhadcba642016-06-02 17:44:24 +0000352/* Decode a parameter that requires a dequoted string.
353**
354** Return 1 if the parameter is seen, or 0 if not. 1 is returned
355** even if there is an error. If an error occurs, then an error message
356** is left in p->zErr. If there are no errors, p->zErr[0]==0.
357*/
358static int csv_string_parameter(
359 CsvReader *p, /* Leave the error message here, if there is one */
360 const char *zParam, /* Parameter we are checking for */
361 const char *zArg, /* Raw text of the virtual table argment */
362 char **pzVal /* Write the dequoted string value here */
363){
364 const char *zValue;
365 zValue = csv_parameter(zParam,strlen(zParam),zArg);
366 if( zValue==0 ) return 0;
367 p->zErr[0] = 0;
368 if( *pzVal ){
369 csv_errmsg(p, "more than one '%s' parameter", zParam);
370 return 1;
371 }
372 *pzVal = sqlite3_mprintf("%s", zValue);
373 if( *pzVal==0 ){
374 csv_errmsg(p, "out of memory");
375 return 1;
376 }
377 csv_trim_whitespace(*pzVal);
378 csv_dequote(*pzVal);
379 return 1;
380}
381
382
drh724b1892016-05-31 16:22:48 +0000383/* Return 0 if the argument is false and 1 if it is true. Return -1 if
384** we cannot really tell.
385*/
386static int csv_boolean(const char *z){
387 if( sqlite3_stricmp("yes",z)==0
388 || sqlite3_stricmp("on",z)==0
389 || sqlite3_stricmp("true",z)==0
390 || (z[0]=='1' && z[0]==0)
391 ){
392 return 1;
393 }
394 if( sqlite3_stricmp("no",z)==0
395 || sqlite3_stricmp("off",z)==0
396 || sqlite3_stricmp("false",z)==0
397 || (z[0]=='0' && z[1]==0)
398 ){
399 return 0;
400 }
401 return -1;
402}
403
404
405/*
406** Parameters:
drhadcba642016-06-02 17:44:24 +0000407** filename=FILENAME Name of file containing CSV content
408** data=TEXT Direct CSV content.
drh1fc1a0f2016-05-31 18:44:33 +0000409** schema=SCHEMA Alternative CSV schema.
drh724b1892016-05-31 16:22:48 +0000410** header=YES|NO First row of CSV defines the names of
411** columns if "yes". Default "no".
drhadcba642016-06-02 17:44:24 +0000412** columns=N Assume the CSV file contains N columns.
drhabfd2722016-05-31 18:08:35 +0000413** testflags=N Bitmask of test flags. Optional
drh724b1892016-05-31 16:22:48 +0000414**
drh1fc1a0f2016-05-31 18:44:33 +0000415** If schema= is omitted, then the columns are named "c0", "c1", "c2",
416** and so forth. If columns=N is omitted, then the file is opened and
417** the number of columns in the first row is counted to determine the
418** column count. If header=YES, then the first row is skipped.
drh724b1892016-05-31 16:22:48 +0000419*/
420static int csvtabConnect(
421 sqlite3 *db,
422 void *pAux,
423 int argc, const char *const*argv,
424 sqlite3_vtab **ppVtab,
425 char **pzErr
426){
drh1fc1a0f2016-05-31 18:44:33 +0000427 CsvTable *pNew = 0; /* The CsvTable object to construct */
428 int bHeader = -1; /* header= flags. -1 means not seen yet */
429 int rc = SQLITE_OK; /* Result code from this routine */
drhadcba642016-06-02 17:44:24 +0000430 int i, j; /* Loop counters */
431 int tstFlags = 0; /* Value for testflags=N parameter */
drh1fc1a0f2016-05-31 18:44:33 +0000432 int nCol = -99; /* Value of the columns= parameter */
433 CsvReader sRdr; /* A CSV file reader used to store an error
434 ** message and/or to count the number of columns */
drhadcba642016-06-02 17:44:24 +0000435 static const char *azParam[] = {
436 "filename", "data", "schema",
437 };
438 char *azPValue[3]; /* Parameter values */
439# define CSV_FILENAME (azPValue[0])
440# define CSV_DATA (azPValue[1])
441# define CSV_SCHEMA (azPValue[2])
drh724b1892016-05-31 16:22:48 +0000442
drhadcba642016-06-02 17:44:24 +0000443
444 assert( sizeof(azPValue)==sizeof(azParam) );
drh724b1892016-05-31 16:22:48 +0000445 memset(&sRdr, 0, sizeof(sRdr));
drhadcba642016-06-02 17:44:24 +0000446 memset(azPValue, 0, sizeof(azPValue));
drh724b1892016-05-31 16:22:48 +0000447 for(i=3; i<argc; i++){
448 const char *z = argv[i];
449 const char *zValue;
drhadcba642016-06-02 17:44:24 +0000450 for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
451 if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
452 }
453 if( j<sizeof(azParam)/sizeof(azParam[0]) ){
454 if( sRdr.zErr[0] ) goto csvtab_connect_error;
drh724b1892016-05-31 16:22:48 +0000455 }else
456 if( (zValue = csv_parameter("header",6,z))!=0 ){
457 int x;
458 if( bHeader>=0 ){
459 csv_errmsg(&sRdr, "more than one 'header' parameter");
460 goto csvtab_connect_error;
461 }
462 x = csv_boolean(zValue);
463 if( x==1 ){
464 bHeader = 1;
465 }else if( x==0 ){
466 bHeader = 0;
467 }else{
468 csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue);
469 goto csvtab_connect_error;
470 }
471 }else
drhabfd2722016-05-31 18:08:35 +0000472 if( (zValue = csv_parameter("testflags",9,z))!=0 ){
473 tstFlags = (unsigned int)atoi(zValue);
474 }else
drh1fc1a0f2016-05-31 18:44:33 +0000475 if( (zValue = csv_parameter("columns",7,z))!=0 ){
476 if( nCol>0 ){
477 csv_errmsg(&sRdr, "more than one 'columns' parameter");
478 goto csvtab_connect_error;
479 }
480 nCol = atoi(zValue);
481 if( nCol<=0 ){
482 csv_errmsg(&sRdr, "must have at least one column");
483 goto csvtab_connect_error;
484 }
485 }else
drh724b1892016-05-31 16:22:48 +0000486 {
487 csv_errmsg(&sRdr, "unrecognized parameter '%s'", z);
488 goto csvtab_connect_error;
489 }
490 }
drhadcba642016-06-02 17:44:24 +0000491 if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
492 csv_errmsg(&sRdr, "must either filename= or data= but not both");
drh724b1892016-05-31 16:22:48 +0000493 goto csvtab_connect_error;
494 }
drhadcba642016-06-02 17:44:24 +0000495 if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){
drh724b1892016-05-31 16:22:48 +0000496 goto csvtab_connect_error;
497 }
498 pNew = sqlite3_malloc( sizeof(*pNew) );
499 *ppVtab = (sqlite3_vtab*)pNew;
500 if( pNew==0 ) goto csvtab_connect_oom;
501 memset(pNew, 0, sizeof(*pNew));
drh1fc1a0f2016-05-31 18:44:33 +0000502 if( nCol>0 ){
503 pNew->nCol = nCol;
504 }else{
505 do{
506 const char *z = csv_read_one_field(&sRdr);
507 if( z==0 ) goto csvtab_connect_oom;
508 pNew->nCol++;
509 }while( sRdr.cTerm==',' );
510 }
drhadcba642016-06-02 17:44:24 +0000511 pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
512 pNew->zData = CSV_DATA; CSV_DATA = 0;
drhabfd2722016-05-31 18:08:35 +0000513 pNew->tstFlags = tstFlags;
drh724b1892016-05-31 16:22:48 +0000514 pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
515 csv_reader_reset(&sRdr);
drhadcba642016-06-02 17:44:24 +0000516 if( CSV_SCHEMA==0 ){
drh724b1892016-05-31 16:22:48 +0000517 char *zSep = "";
drhadcba642016-06-02 17:44:24 +0000518 CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x(");
519 if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
drh724b1892016-05-31 16:22:48 +0000520 for(i=0; i<pNew->nCol; i++){
drhadcba642016-06-02 17:44:24 +0000521 CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
drh724b1892016-05-31 16:22:48 +0000522 zSep = ",";
523 }
drhadcba642016-06-02 17:44:24 +0000524 CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA);
drh724b1892016-05-31 16:22:48 +0000525 }
drhadcba642016-06-02 17:44:24 +0000526 rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
drh724b1892016-05-31 16:22:48 +0000527 if( rc ) goto csvtab_connect_error;
drhadcba642016-06-02 17:44:24 +0000528 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
529 sqlite3_free(azPValue[i]);
530 }
drh724b1892016-05-31 16:22:48 +0000531 return SQLITE_OK;
532
533csvtab_connect_oom:
534 rc = SQLITE_NOMEM;
535 csv_errmsg(&sRdr, "out of memory");
536
537csvtab_connect_error:
538 if( pNew ) csvtabDisconnect(&pNew->base);
drhadcba642016-06-02 17:44:24 +0000539 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
540 sqlite3_free(azPValue[i]);
541 }
drh724b1892016-05-31 16:22:48 +0000542 if( sRdr.zErr[0] ){
543 sqlite3_free(*pzErr);
544 *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
545 }
546 csv_reader_reset(&sRdr);
drhabfd2722016-05-31 18:08:35 +0000547 if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
drh724b1892016-05-31 16:22:48 +0000548 return rc;
549}
550
551/*
552** Reset the current row content held by a CsvCursor.
553*/
554static void csvtabCursorRowReset(CsvCursor *pCur){
555 CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
556 int i;
557 for(i=0; i<pTab->nCol; i++){
558 sqlite3_free(pCur->azVal[i]);
559 pCur->azVal[i] = 0;
560 }
561}
562
563/*
564** The xConnect and xCreate methods do the same thing, but they must be
565** different so that the virtual table is not an eponymous virtual table.
566*/
567static int csvtabCreate(
568 sqlite3 *db,
569 void *pAux,
570 int argc, const char *const*argv,
571 sqlite3_vtab **ppVtab,
572 char **pzErr
573){
574 return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
575}
576
577/*
578** Destructor for a CsvCursor.
579*/
580static int csvtabClose(sqlite3_vtab_cursor *cur){
581 CsvCursor *pCur = (CsvCursor*)cur;
582 csvtabCursorRowReset(pCur);
583 csv_reader_reset(&pCur->rdr);
584 sqlite3_free(cur);
585 return SQLITE_OK;
586}
587
588/*
589** Constructor for a new CsvTable cursor object.
590*/
591static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
592 CsvTable *pTab = (CsvTable*)p;
593 CsvCursor *pCur;
594 pCur = sqlite3_malloc( sizeof(*pCur) * sizeof(char*)*pTab->nCol );
595 if( pCur==0 ) return SQLITE_NOMEM;
596 memset(pCur, 0, sizeof(*pCur) + sizeof(char*)*pTab->nCol );
597 pCur->azVal = (char**)&pCur[1];
598 *ppCursor = &pCur->base;
drhadcba642016-06-02 17:44:24 +0000599 if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
drh724b1892016-05-31 16:22:48 +0000600 csv_xfer_error(pTab, &pCur->rdr);
601 return SQLITE_ERROR;
602 }
603 return SQLITE_OK;
604}
605
606
607/*
608** Advance a CsvCursor to its next row of input.
609** Set the EOF marker if we reach the end of input.
610*/
611static int csvtabNext(sqlite3_vtab_cursor *cur){
612 CsvCursor *pCur = (CsvCursor*)cur;
613 CsvTable *pTab = (CsvTable*)cur->pVtab;
614 int i = 0;
615 char *z;
616 csvtabCursorRowReset(pCur);
617 do{
618 z = csv_read_one_field(&pCur->rdr);
619 if( z==0 ){
620 csv_xfer_error(pTab, &pCur->rdr);
621 break;
622 }
623 z = sqlite3_mprintf("%s", z);
624 if( z==0 ){
625 csv_errmsg(&pCur->rdr, "out of memory");
626 csv_xfer_error(pTab, &pCur->rdr);
627 break;
628 }
629 if( i<pTab->nCol ){
630 pCur->azVal[i++] = z;
631 }
632 }while( z!=0 && pCur->rdr.cTerm==',' );
633 if( z==0 || pCur->rdr.cTerm==EOF ){
634 pCur->iRowid = -1;
635 }else{
636 pCur->iRowid++;
637 }
638 return SQLITE_OK;
639}
640
641/*
642** Return values of columns for the row at which the CsvCursor
643** is currently pointing.
644*/
645static int csvtabColumn(
646 sqlite3_vtab_cursor *cur, /* The cursor */
647 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
648 int i /* Which column to return */
649){
650 CsvCursor *pCur = (CsvCursor*)cur;
651 CsvTable *pTab = (CsvTable*)cur->pVtab;
652 if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
653 sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC);
654 }
655 return SQLITE_OK;
656}
657
658/*
659** Return the rowid for the current row.
660*/
661static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
662 CsvCursor *pCur = (CsvCursor*)cur;
663 *pRowid = pCur->iRowid;
664 return SQLITE_OK;
665}
666
667/*
668** Return TRUE if the cursor has been moved off of the last
669** row of output.
670*/
671static int csvtabEof(sqlite3_vtab_cursor *cur){
672 CsvCursor *pCur = (CsvCursor*)cur;
673 return pCur->iRowid<0;
674}
675
676/*
677** Only a full table scan is supported. So xFilter simply rewinds to
678** the beginning.
679*/
680static int csvtabFilter(
681 sqlite3_vtab_cursor *pVtabCursor,
682 int idxNum, const char *idxStr,
683 int argc, sqlite3_value **argv
684){
685 CsvCursor *pCur = (CsvCursor*)pVtabCursor;
686 CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
687 pCur->iRowid = 0;
drhadcba642016-06-02 17:44:24 +0000688 if( pCur->rdr.in==0 ){
689 assert( pCur->rdr.zIn==pTab->zData );
690 assert( pTab->iStart<=pCur->rdr.nIn );
691 pCur->rdr.iIn = pTab->iStart;
692 }else{
693 fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
694 pCur->rdr.iIn = 0;
695 pCur->rdr.nIn = 0;
696 }
drh724b1892016-05-31 16:22:48 +0000697 return csvtabNext(pVtabCursor);
698}
699
700/*
drhadcba642016-06-02 17:44:24 +0000701** Only a forward full table scan is supported. xBestIndex is mostly
drhabfd2722016-05-31 18:08:35 +0000702** a no-op. If CSVTEST_FIDX is set, then the presence of equality
703** constraints lowers the estimated cost, which is fiction, but is useful
704** for testing certain kinds of virtual table behavior.
drh724b1892016-05-31 16:22:48 +0000705*/
706static int csvtabBestIndex(
707 sqlite3_vtab *tab,
708 sqlite3_index_info *pIdxInfo
709){
drhabfd2722016-05-31 18:08:35 +0000710 CsvTable *pTab = (CsvTable*)tab;
711 int i;
drh1fc1a0f2016-05-31 18:44:33 +0000712 int nConst = 0;
drhabfd2722016-05-31 18:08:35 +0000713 pIdxInfo->estimatedCost = 1000000;
714 if( (pTab->tstFlags & CSVTEST_FIDX)==0 ){
715 return SQLITE_OK;
716 }
drhadcba642016-06-02 17:44:24 +0000717 /* The usual (and sensible) case is to take the "return SQLITE_OK" above.
718 ** The code below only runs when testflags=1. The code below
drh1fc1a0f2016-05-31 18:44:33 +0000719 ** generates an artifical and unrealistic plan which is useful
drhadcba642016-06-02 17:44:24 +0000720 ** for testing virtual table logic but is not helpfulto real applications.
721 **
722 ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
723 ** table (even though it is not) and the cost of running the virtual table
724 ** is reduced from 1 million to just 10. The constraints are *not* marked
725 ** as omittable, however, so the query planner should still generate a
726 ** plan that gives a correct answer, even if they plan is not optimal.
727 */
drhabfd2722016-05-31 18:08:35 +0000728 for(i=0; i<pIdxInfo->nConstraint; i++){
drh1fc1a0f2016-05-31 18:44:33 +0000729 unsigned char op;
drhabfd2722016-05-31 18:08:35 +0000730 if( pIdxInfo->aConstraint[i].usable==0 ) continue;
drh1fc1a0f2016-05-31 18:44:33 +0000731 op = pIdxInfo->aConstraint[i].op;
732 if( op==SQLITE_INDEX_CONSTRAINT_EQ
733 || op==SQLITE_INDEX_CONSTRAINT_LIKE
734 || op==SQLITE_INDEX_CONSTRAINT_GLOB
735 ){
drhabfd2722016-05-31 18:08:35 +0000736 pIdxInfo->estimatedCost = 10;
drh1fc1a0f2016-05-31 18:44:33 +0000737 pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
738 nConst++;
drhabfd2722016-05-31 18:08:35 +0000739 }
740 }
drh724b1892016-05-31 16:22:48 +0000741 return SQLITE_OK;
742}
743
744
745static sqlite3_module CsvModule = {
746 0, /* iVersion */
747 csvtabCreate, /* xCreate */
748 csvtabConnect, /* xConnect */
749 csvtabBestIndex, /* xBestIndex */
750 csvtabDisconnect, /* xDisconnect */
751 csvtabDisconnect, /* xDestroy */
752 csvtabOpen, /* xOpen - open a cursor */
753 csvtabClose, /* xClose - close a cursor */
754 csvtabFilter, /* xFilter - configure scan constraints */
755 csvtabNext, /* xNext - advance a cursor */
756 csvtabEof, /* xEof - check for end of scan */
757 csvtabColumn, /* xColumn - read data */
758 csvtabRowid, /* xRowid - read data */
759 0, /* xUpdate */
760 0, /* xBegin */
761 0, /* xSync */
762 0, /* xCommit */
763 0, /* xRollback */
764 0, /* xFindMethod */
765 0, /* xRename */
766};
767
768#ifdef _WIN32
769__declspec(dllexport)
770#endif
771/*
772** This routine is called when the extension is loaded. The new
773** CSV virtual table module is registered with the calling database
774** connection.
775*/
776int sqlite3_csv_init(
777 sqlite3 *db,
778 char **pzErrMsg,
779 const sqlite3_api_routines *pApi
780){
781 SQLITE_EXTENSION_INIT2(pApi);
782 return sqlite3_create_module(db, "csv", &CsvModule, 0);
783}