blob: 1eafc3a41409d4d99c039cf0ec81922cc1bea965 [file] [log] [blame]
drh724b1892016-05-31 16:22:48 +00001/*
2** 2016-05-28
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11******************************************************************************
12**
13** This file contains the implementation of an SQLite virtual table for
14** reading CSV files.
15**
16** Usage:
17**
18** .load ./csv
19** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
20** SELECT * FROM csv;
21**
22** The columns are named "c1", "c2", "c3", ... by default. But the
23** application can define its own CREATE TABLE statement as an additional
24** parameter. For example:
25**
26** CREATE VIRTUAL TABLE temp.csv2 USING csv(
27** filename = "../http.log",
28** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
29** );
drhac9c3d22016-06-03 01:01:57 +000030**
31** Instead of specifying a file, the text of the CSV can be loaded using
32** the data= parameter.
33**
34** If the columns=N parameter is supplied, then the CSV file is assumed to have
35** N columns. If the columns parameter is omitted, the CSV file is opened
36** as soon as the virtual table is constructed and the first row of the CSV
37** is read in order to count the tables.
38**
39** Some extra debugging features (used for testing virtual tables) are available
40** if this module is compiled with -DSQLITE_TEST.
drh724b1892016-05-31 16:22:48 +000041*/
42#include <sqlite3ext.h>
43SQLITE_EXTENSION_INIT1
44#include <string.h>
45#include <stdlib.h>
46#include <assert.h>
47#include <stdarg.h>
48#include <ctype.h>
49#include <stdio.h>
50
drheb5a5492016-07-15 02:50:18 +000051#ifndef SQLITE_OMIT_VIRTUALTABLE
52
drh724b1892016-05-31 16:22:48 +000053/*
54** A macro to hint to the compiler that a function should not be
55** inlined.
56*/
57#if defined(__GNUC__)
58# define CSV_NOINLINE __attribute__((noinline))
59#elif defined(_MSC_VER) && _MSC_VER>=1310
60# define CSV_NOINLINE __declspec(noinline)
61#else
62# define CSV_NOINLINE
63#endif
64
65
66/* Max size of the error message in a CsvReader */
67#define CSV_MXERR 200
68
drhadcba642016-06-02 17:44:24 +000069/* Size of the CsvReader input buffer */
70#define CSV_INBUFSZ 1024
71
drh724b1892016-05-31 16:22:48 +000072/* A context object used when read a CSV file. */
73typedef struct CsvReader CsvReader;
74struct CsvReader {
75 FILE *in; /* Read the CSV text from this input stream */
76 char *z; /* Accumulated text for a field */
77 int n; /* Number of bytes in z */
78 int nAlloc; /* Space allocated for z[] */
79 int nLine; /* Current line number */
drhd5fbde82017-06-26 18:42:23 +000080 int bNotFirst; /* True if prior text has been seen */
drhadcba642016-06-02 17:44:24 +000081 char cTerm; /* Character that terminated the most recent field */
82 size_t iIn; /* Next unread character in the input buffer */
83 size_t nIn; /* Number of characters in the input buffer */
84 char *zIn; /* The input buffer */
drh724b1892016-05-31 16:22:48 +000085 char zErr[CSV_MXERR]; /* Error message */
86};
87
88/* Initialize a CsvReader object */
89static void csv_reader_init(CsvReader *p){
drhadcba642016-06-02 17:44:24 +000090 p->in = 0;
91 p->z = 0;
92 p->n = 0;
93 p->nAlloc = 0;
94 p->nLine = 0;
drhd5fbde82017-06-26 18:42:23 +000095 p->bNotFirst = 0;
drhadcba642016-06-02 17:44:24 +000096 p->nIn = 0;
97 p->zIn = 0;
98 p->zErr[0] = 0;
drh724b1892016-05-31 16:22:48 +000099}
100
101/* Close and reset a CsvReader object */
102static void csv_reader_reset(CsvReader *p){
drhadcba642016-06-02 17:44:24 +0000103 if( p->in ){
104 fclose(p->in);
105 sqlite3_free(p->zIn);
106 }
drh724b1892016-05-31 16:22:48 +0000107 sqlite3_free(p->z);
108 csv_reader_init(p);
109}
110
111/* Report an error on a CsvReader */
112static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
113 va_list ap;
114 va_start(ap, zFormat);
115 sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
116 va_end(ap);
117}
118
119/* Open the file associated with a CsvReader
120** Return the number of errors.
121*/
drhadcba642016-06-02 17:44:24 +0000122static int csv_reader_open(
123 CsvReader *p, /* The reader to open */
124 const char *zFilename, /* Read from this filename */
125 const char *zData /* ... or use this data */
126){
127 if( zFilename ){
128 p->zIn = sqlite3_malloc( CSV_INBUFSZ );
129 if( p->zIn==0 ){
130 csv_errmsg(p, "out of memory");
131 return 1;
132 }
133 p->in = fopen(zFilename, "rb");
134 if( p->in==0 ){
135 csv_reader_reset(p);
136 csv_errmsg(p, "cannot open '%s' for reading", zFilename);
137 return 1;
138 }
139 }else{
140 assert( p->in==0 );
141 p->zIn = (char*)zData;
142 p->nIn = strlen(zData);
drh724b1892016-05-31 16:22:48 +0000143 }
144 return 0;
145}
146
drhadcba642016-06-02 17:44:24 +0000147/* The input buffer has overflowed. Refill the input buffer, then
148** return the next character
149*/
150static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
151 size_t got;
152
153 assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */
154 assert( p->in!=0 ); /* Only called if reading froma file */
155
156 got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
157 if( got==0 ) return EOF;
158 p->nIn = got;
159 p->iIn = 1;
160 return p->zIn[0];
161}
162
163/* Return the next character of input. Return EOF at end of input. */
164static int csv_getc(CsvReader *p){
165 if( p->iIn >= p->nIn ){
166 if( p->in!=0 ) return csv_getc_refill(p);
167 return EOF;
168 }
169 return p->zIn[p->iIn++];
170}
171
drh724b1892016-05-31 16:22:48 +0000172/* Increase the size of p->z and append character c to the end.
173** Return 0 on success and non-zero if there is an OOM error */
174static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
175 char *zNew;
176 int nNew = p->nAlloc*2 + 100;
177 zNew = sqlite3_realloc64(p->z, nNew);
178 if( zNew ){
179 p->z = zNew;
180 p->nAlloc = nNew;
181 p->z[p->n++] = c;
182 return 0;
183 }else{
184 csv_errmsg(p, "out of memory");
185 return 1;
186 }
187}
188
189/* Append a single character to the CsvReader.z[] array.
190** Return 0 on success and non-zero if there is an OOM error */
191static int csv_append(CsvReader *p, char c){
192 if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
193 p->z[p->n++] = c;
194 return 0;
195}
196
197/* Read a single field of CSV text. Compatible with rfc4180 and extended
198** with the option of having a separator other than ",".
199**
200** + Input comes from p->in.
201** + Store results in p->z of length p->n. Space to hold p->z comes
202** from sqlite3_malloc64().
203** + Keep track of the line number in p->nLine.
204** + Store the character that terminates the field in p->cTerm. Store
205** EOF on end-of-file.
206**
207** Return "" at EOF. Return 0 on an OOM error.
208*/
209static char *csv_read_one_field(CsvReader *p){
210 int c;
211 p->n = 0;
drhadcba642016-06-02 17:44:24 +0000212 c = csv_getc(p);
drh724b1892016-05-31 16:22:48 +0000213 if( c==EOF ){
214 p->cTerm = EOF;
215 return "";
216 }
217 if( c=='"' ){
218 int pc, ppc;
219 int startLine = p->nLine;
drh724b1892016-05-31 16:22:48 +0000220 pc = ppc = 0;
221 while( 1 ){
drhadcba642016-06-02 17:44:24 +0000222 c = csv_getc(p);
drhac9c3d22016-06-03 01:01:57 +0000223 if( c<='"' || pc=='"' ){
224 if( c=='\n' ) p->nLine++;
225 if( c=='"' ){
226 if( pc=='"' ){
227 pc = 0;
228 continue;
229 }
drh724b1892016-05-31 16:22:48 +0000230 }
drhac9c3d22016-06-03 01:01:57 +0000231 if( (c==',' && pc=='"')
232 || (c=='\n' && pc=='"')
233 || (c=='\n' && pc=='\r' && ppc=='"')
234 || (c==EOF && pc=='"')
235 ){
236 do{ p->n--; }while( p->z[p->n]!='"' );
mistachkin80f2b332016-07-22 21:26:56 +0000237 p->cTerm = (char)c;
drhac9c3d22016-06-03 01:01:57 +0000238 break;
239 }
240 if( pc=='"' && c!='\r' ){
241 csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
242 break;
243 }
244 if( c==EOF ){
245 csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
246 startLine, '"');
mistachkin80f2b332016-07-22 21:26:56 +0000247 p->cTerm = (char)c;
drhac9c3d22016-06-03 01:01:57 +0000248 break;
249 }
drh724b1892016-05-31 16:22:48 +0000250 }
251 if( csv_append(p, (char)c) ) return 0;
252 ppc = pc;
253 pc = c;
254 }
255 }else{
drhd5fbde82017-06-26 18:42:23 +0000256 /* If this is the first field being parsed and it begins with the
257 ** UTF-8 BOM (0xEF BB BF) then skip the BOM */
258 if( (c&0xff)==0xef && p->bNotFirst==0 ){
drh2fb960b2017-06-28 15:17:31 +0000259 csv_append(p, (char)c);
drhd5fbde82017-06-26 18:42:23 +0000260 c = csv_getc(p);
261 if( (c&0xff)==0xbb ){
drh2fb960b2017-06-28 15:17:31 +0000262 csv_append(p, (char)c);
drhd5fbde82017-06-26 18:42:23 +0000263 c = csv_getc(p);
264 if( (c&0xff)==0xbf ){
265 p->bNotFirst = 1;
266 p->n = 0;
267 return csv_read_one_field(p);
268 }
269 }
270 }
drhac9c3d22016-06-03 01:01:57 +0000271 while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
drh724b1892016-05-31 16:22:48 +0000272 if( csv_append(p, (char)c) ) return 0;
drhadcba642016-06-02 17:44:24 +0000273 c = csv_getc(p);
drh724b1892016-05-31 16:22:48 +0000274 }
275 if( c=='\n' ){
276 p->nLine++;
277 if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
278 }
mistachkin80f2b332016-07-22 21:26:56 +0000279 p->cTerm = (char)c;
drh724b1892016-05-31 16:22:48 +0000280 }
281 if( p->z ) p->z[p->n] = 0;
drhd5fbde82017-06-26 18:42:23 +0000282 p->bNotFirst = 1;
drh724b1892016-05-31 16:22:48 +0000283 return p->z;
284}
285
286
287/* Forward references to the various virtual table methods implemented
288** in this file. */
289static int csvtabCreate(sqlite3*, void*, int, const char*const*,
290 sqlite3_vtab**,char**);
291static int csvtabConnect(sqlite3*, void*, int, const char*const*,
292 sqlite3_vtab**,char**);
293static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
294static int csvtabDisconnect(sqlite3_vtab*);
295static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
296static int csvtabClose(sqlite3_vtab_cursor*);
297static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
298 int argc, sqlite3_value **argv);
299static int csvtabNext(sqlite3_vtab_cursor*);
300static int csvtabEof(sqlite3_vtab_cursor*);
301static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
302static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
303
304/* An instance of the CSV virtual table */
305typedef struct CsvTable {
306 sqlite3_vtab base; /* Base class. Must be first */
307 char *zFilename; /* Name of the CSV file */
drhadcba642016-06-02 17:44:24 +0000308 char *zData; /* Raw CSV data in lieu of zFilename */
drh724b1892016-05-31 16:22:48 +0000309 long iStart; /* Offset to start of data in zFilename */
310 int nCol; /* Number of columns in the CSV file */
drhabfd2722016-05-31 18:08:35 +0000311 unsigned int tstFlags; /* Bit values used for testing */
drh724b1892016-05-31 16:22:48 +0000312} CsvTable;
313
drhabfd2722016-05-31 18:08:35 +0000314/* Allowed values for tstFlags */
315#define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/
316
drh724b1892016-05-31 16:22:48 +0000317/* A cursor for the CSV virtual table */
318typedef struct CsvCursor {
319 sqlite3_vtab_cursor base; /* Base class. Must be first */
320 CsvReader rdr; /* The CsvReader object */
321 char **azVal; /* Value of the current row */
drhac9c3d22016-06-03 01:01:57 +0000322 int *aLen; /* Length of each entry */
drh724b1892016-05-31 16:22:48 +0000323 sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */
324} CsvCursor;
325
326/* Transfer error message text from a reader into a CsvTable */
327static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
328 sqlite3_free(pTab->base.zErrMsg);
329 pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
330}
331
332/*
333** This method is the destructor fo a CsvTable object.
334*/
335static int csvtabDisconnect(sqlite3_vtab *pVtab){
336 CsvTable *p = (CsvTable*)pVtab;
337 sqlite3_free(p->zFilename);
drh35db31b2016-06-02 23:13:21 +0000338 sqlite3_free(p->zData);
drh724b1892016-05-31 16:22:48 +0000339 sqlite3_free(p);
340 return SQLITE_OK;
341}
342
343/* Skip leading whitespace. Return a pointer to the first non-whitespace
344** character, or to the zero terminator if the string has only whitespace */
345static const char *csv_skip_whitespace(const char *z){
346 while( isspace((unsigned char)z[0]) ) z++;
347 return z;
348}
349
350/* Remove trailing whitespace from the end of string z[] */
351static void csv_trim_whitespace(char *z){
352 size_t n = strlen(z);
353 while( n>0 && isspace((unsigned char)z[n]) ) n--;
354 z[n] = 0;
355}
356
357/* Dequote the string */
358static void csv_dequote(char *z){
mistachkin80f2b332016-07-22 21:26:56 +0000359 int j;
drh724b1892016-05-31 16:22:48 +0000360 char cQuote = z[0];
mistachkin80f2b332016-07-22 21:26:56 +0000361 size_t i, n;
drh724b1892016-05-31 16:22:48 +0000362
363 if( cQuote!='\'' && cQuote!='"' ) return;
364 n = strlen(z);
365 if( n<2 || z[n-1]!=z[0] ) return;
366 for(i=1, j=0; i<n-1; i++){
367 if( z[i]==cQuote && z[i+1]==cQuote ) i++;
368 z[j++] = z[i];
369 }
370 z[j] = 0;
371}
372
373/* Check to see if the string is of the form: "TAG = VALUE" with optional
374** whitespace before and around tokens. If it is, return a pointer to the
375** first character of VALUE. If it is not, return NULL.
376*/
377static const char *csv_parameter(const char *zTag, int nTag, const char *z){
378 z = csv_skip_whitespace(z);
379 if( strncmp(zTag, z, nTag)!=0 ) return 0;
380 z = csv_skip_whitespace(z+nTag);
381 if( z[0]!='=' ) return 0;
382 return csv_skip_whitespace(z+1);
383}
384
drhadcba642016-06-02 17:44:24 +0000385/* Decode a parameter that requires a dequoted string.
386**
387** Return 1 if the parameter is seen, or 0 if not. 1 is returned
388** even if there is an error. If an error occurs, then an error message
389** is left in p->zErr. If there are no errors, p->zErr[0]==0.
390*/
391static int csv_string_parameter(
392 CsvReader *p, /* Leave the error message here, if there is one */
393 const char *zParam, /* Parameter we are checking for */
394 const char *zArg, /* Raw text of the virtual table argment */
395 char **pzVal /* Write the dequoted string value here */
396){
397 const char *zValue;
drh11499f02016-07-09 16:38:25 +0000398 zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
drhadcba642016-06-02 17:44:24 +0000399 if( zValue==0 ) return 0;
400 p->zErr[0] = 0;
401 if( *pzVal ){
402 csv_errmsg(p, "more than one '%s' parameter", zParam);
403 return 1;
404 }
405 *pzVal = sqlite3_mprintf("%s", zValue);
406 if( *pzVal==0 ){
407 csv_errmsg(p, "out of memory");
408 return 1;
409 }
410 csv_trim_whitespace(*pzVal);
411 csv_dequote(*pzVal);
412 return 1;
413}
414
415
drh724b1892016-05-31 16:22:48 +0000416/* Return 0 if the argument is false and 1 if it is true. Return -1 if
417** we cannot really tell.
418*/
419static int csv_boolean(const char *z){
420 if( sqlite3_stricmp("yes",z)==0
421 || sqlite3_stricmp("on",z)==0
422 || sqlite3_stricmp("true",z)==0
mistachkin58282f62016-11-09 01:46:13 +0000423 || (z[0]=='1' && z[1]==0)
drh724b1892016-05-31 16:22:48 +0000424 ){
425 return 1;
426 }
427 if( sqlite3_stricmp("no",z)==0
428 || sqlite3_stricmp("off",z)==0
429 || sqlite3_stricmp("false",z)==0
430 || (z[0]=='0' && z[1]==0)
431 ){
432 return 0;
433 }
434 return -1;
435}
436
437
438/*
439** Parameters:
drhadcba642016-06-02 17:44:24 +0000440** filename=FILENAME Name of file containing CSV content
441** data=TEXT Direct CSV content.
drh1fc1a0f2016-05-31 18:44:33 +0000442** schema=SCHEMA Alternative CSV schema.
drh724b1892016-05-31 16:22:48 +0000443** header=YES|NO First row of CSV defines the names of
444** columns if "yes". Default "no".
drhadcba642016-06-02 17:44:24 +0000445** columns=N Assume the CSV file contains N columns.
drhac9c3d22016-06-03 01:01:57 +0000446**
447** Only available if compiled with SQLITE_TEST:
448**
drhabfd2722016-05-31 18:08:35 +0000449** testflags=N Bitmask of test flags. Optional
drh724b1892016-05-31 16:22:48 +0000450**
drh1fc1a0f2016-05-31 18:44:33 +0000451** If schema= is omitted, then the columns are named "c0", "c1", "c2",
452** and so forth. If columns=N is omitted, then the file is opened and
453** the number of columns in the first row is counted to determine the
454** column count. If header=YES, then the first row is skipped.
drh724b1892016-05-31 16:22:48 +0000455*/
456static int csvtabConnect(
457 sqlite3 *db,
458 void *pAux,
459 int argc, const char *const*argv,
460 sqlite3_vtab **ppVtab,
461 char **pzErr
462){
drh1fc1a0f2016-05-31 18:44:33 +0000463 CsvTable *pNew = 0; /* The CsvTable object to construct */
464 int bHeader = -1; /* header= flags. -1 means not seen yet */
465 int rc = SQLITE_OK; /* Result code from this routine */
drhadcba642016-06-02 17:44:24 +0000466 int i, j; /* Loop counters */
drhac9c3d22016-06-03 01:01:57 +0000467#ifdef SQLITE_TEST
drhadcba642016-06-02 17:44:24 +0000468 int tstFlags = 0; /* Value for testflags=N parameter */
drhac9c3d22016-06-03 01:01:57 +0000469#endif
drh1fc1a0f2016-05-31 18:44:33 +0000470 int nCol = -99; /* Value of the columns= parameter */
471 CsvReader sRdr; /* A CSV file reader used to store an error
472 ** message and/or to count the number of columns */
drhadcba642016-06-02 17:44:24 +0000473 static const char *azParam[] = {
474 "filename", "data", "schema",
475 };
476 char *azPValue[3]; /* Parameter values */
477# define CSV_FILENAME (azPValue[0])
478# define CSV_DATA (azPValue[1])
479# define CSV_SCHEMA (azPValue[2])
drh724b1892016-05-31 16:22:48 +0000480
drhadcba642016-06-02 17:44:24 +0000481
482 assert( sizeof(azPValue)==sizeof(azParam) );
drh724b1892016-05-31 16:22:48 +0000483 memset(&sRdr, 0, sizeof(sRdr));
drhadcba642016-06-02 17:44:24 +0000484 memset(azPValue, 0, sizeof(azPValue));
drh724b1892016-05-31 16:22:48 +0000485 for(i=3; i<argc; i++){
486 const char *z = argv[i];
487 const char *zValue;
drhadcba642016-06-02 17:44:24 +0000488 for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
489 if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
490 }
491 if( j<sizeof(azParam)/sizeof(azParam[0]) ){
492 if( sRdr.zErr[0] ) goto csvtab_connect_error;
drh724b1892016-05-31 16:22:48 +0000493 }else
494 if( (zValue = csv_parameter("header",6,z))!=0 ){
495 int x;
496 if( bHeader>=0 ){
497 csv_errmsg(&sRdr, "more than one 'header' parameter");
498 goto csvtab_connect_error;
499 }
500 x = csv_boolean(zValue);
501 if( x==1 ){
502 bHeader = 1;
503 }else if( x==0 ){
504 bHeader = 0;
505 }else{
506 csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue);
507 goto csvtab_connect_error;
508 }
509 }else
drhac9c3d22016-06-03 01:01:57 +0000510#ifdef SQLITE_TEST
drhabfd2722016-05-31 18:08:35 +0000511 if( (zValue = csv_parameter("testflags",9,z))!=0 ){
512 tstFlags = (unsigned int)atoi(zValue);
513 }else
drhac9c3d22016-06-03 01:01:57 +0000514#endif
drh1fc1a0f2016-05-31 18:44:33 +0000515 if( (zValue = csv_parameter("columns",7,z))!=0 ){
516 if( nCol>0 ){
517 csv_errmsg(&sRdr, "more than one 'columns' parameter");
518 goto csvtab_connect_error;
519 }
520 nCol = atoi(zValue);
521 if( nCol<=0 ){
522 csv_errmsg(&sRdr, "must have at least one column");
523 goto csvtab_connect_error;
524 }
525 }else
drh724b1892016-05-31 16:22:48 +0000526 {
527 csv_errmsg(&sRdr, "unrecognized parameter '%s'", z);
528 goto csvtab_connect_error;
529 }
530 }
drhadcba642016-06-02 17:44:24 +0000531 if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
532 csv_errmsg(&sRdr, "must either filename= or data= but not both");
drh724b1892016-05-31 16:22:48 +0000533 goto csvtab_connect_error;
534 }
drhadcba642016-06-02 17:44:24 +0000535 if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){
drh724b1892016-05-31 16:22:48 +0000536 goto csvtab_connect_error;
537 }
538 pNew = sqlite3_malloc( sizeof(*pNew) );
539 *ppVtab = (sqlite3_vtab*)pNew;
540 if( pNew==0 ) goto csvtab_connect_oom;
541 memset(pNew, 0, sizeof(*pNew));
drh1fc1a0f2016-05-31 18:44:33 +0000542 if( nCol>0 ){
543 pNew->nCol = nCol;
544 }else{
545 do{
546 const char *z = csv_read_one_field(&sRdr);
547 if( z==0 ) goto csvtab_connect_oom;
548 pNew->nCol++;
549 }while( sRdr.cTerm==',' );
550 }
drhadcba642016-06-02 17:44:24 +0000551 pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
552 pNew->zData = CSV_DATA; CSV_DATA = 0;
drhac9c3d22016-06-03 01:01:57 +0000553#ifdef SQLITE_TEST
drhabfd2722016-05-31 18:08:35 +0000554 pNew->tstFlags = tstFlags;
drhac9c3d22016-06-03 01:01:57 +0000555#endif
drh724b1892016-05-31 16:22:48 +0000556 pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
557 csv_reader_reset(&sRdr);
drhadcba642016-06-02 17:44:24 +0000558 if( CSV_SCHEMA==0 ){
drh724b1892016-05-31 16:22:48 +0000559 char *zSep = "";
drhadcba642016-06-02 17:44:24 +0000560 CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x(");
561 if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
drh724b1892016-05-31 16:22:48 +0000562 for(i=0; i<pNew->nCol; i++){
drhadcba642016-06-02 17:44:24 +0000563 CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
drh724b1892016-05-31 16:22:48 +0000564 zSep = ",";
565 }
drhadcba642016-06-02 17:44:24 +0000566 CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA);
drh724b1892016-05-31 16:22:48 +0000567 }
drhadcba642016-06-02 17:44:24 +0000568 rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
drh724b1892016-05-31 16:22:48 +0000569 if( rc ) goto csvtab_connect_error;
drhadcba642016-06-02 17:44:24 +0000570 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
571 sqlite3_free(azPValue[i]);
572 }
drh724b1892016-05-31 16:22:48 +0000573 return SQLITE_OK;
574
575csvtab_connect_oom:
576 rc = SQLITE_NOMEM;
577 csv_errmsg(&sRdr, "out of memory");
578
579csvtab_connect_error:
580 if( pNew ) csvtabDisconnect(&pNew->base);
drhadcba642016-06-02 17:44:24 +0000581 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
582 sqlite3_free(azPValue[i]);
583 }
drh724b1892016-05-31 16:22:48 +0000584 if( sRdr.zErr[0] ){
585 sqlite3_free(*pzErr);
586 *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
587 }
588 csv_reader_reset(&sRdr);
drhabfd2722016-05-31 18:08:35 +0000589 if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
drh724b1892016-05-31 16:22:48 +0000590 return rc;
591}
592
593/*
594** Reset the current row content held by a CsvCursor.
595*/
596static void csvtabCursorRowReset(CsvCursor *pCur){
597 CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
598 int i;
599 for(i=0; i<pTab->nCol; i++){
600 sqlite3_free(pCur->azVal[i]);
601 pCur->azVal[i] = 0;
drhac9c3d22016-06-03 01:01:57 +0000602 pCur->aLen[i] = 0;
drh724b1892016-05-31 16:22:48 +0000603 }
604}
605
606/*
607** The xConnect and xCreate methods do the same thing, but they must be
608** different so that the virtual table is not an eponymous virtual table.
609*/
610static int csvtabCreate(
611 sqlite3 *db,
612 void *pAux,
613 int argc, const char *const*argv,
614 sqlite3_vtab **ppVtab,
615 char **pzErr
616){
617 return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
618}
619
620/*
621** Destructor for a CsvCursor.
622*/
623static int csvtabClose(sqlite3_vtab_cursor *cur){
624 CsvCursor *pCur = (CsvCursor*)cur;
625 csvtabCursorRowReset(pCur);
626 csv_reader_reset(&pCur->rdr);
627 sqlite3_free(cur);
628 return SQLITE_OK;
629}
630
631/*
632** Constructor for a new CsvTable cursor object.
633*/
634static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
635 CsvTable *pTab = (CsvTable*)p;
636 CsvCursor *pCur;
drhac9c3d22016-06-03 01:01:57 +0000637 size_t nByte;
638 nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
drh11499f02016-07-09 16:38:25 +0000639 pCur = sqlite3_malloc64( nByte );
drh724b1892016-05-31 16:22:48 +0000640 if( pCur==0 ) return SQLITE_NOMEM;
drhac9c3d22016-06-03 01:01:57 +0000641 memset(pCur, 0, nByte);
drh724b1892016-05-31 16:22:48 +0000642 pCur->azVal = (char**)&pCur[1];
drhac9c3d22016-06-03 01:01:57 +0000643 pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
drh724b1892016-05-31 16:22:48 +0000644 *ppCursor = &pCur->base;
drhadcba642016-06-02 17:44:24 +0000645 if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
drh724b1892016-05-31 16:22:48 +0000646 csv_xfer_error(pTab, &pCur->rdr);
647 return SQLITE_ERROR;
648 }
649 return SQLITE_OK;
650}
651
652
653/*
654** Advance a CsvCursor to its next row of input.
655** Set the EOF marker if we reach the end of input.
656*/
657static int csvtabNext(sqlite3_vtab_cursor *cur){
658 CsvCursor *pCur = (CsvCursor*)cur;
659 CsvTable *pTab = (CsvTable*)cur->pVtab;
660 int i = 0;
661 char *z;
drh724b1892016-05-31 16:22:48 +0000662 do{
663 z = csv_read_one_field(&pCur->rdr);
664 if( z==0 ){
665 csv_xfer_error(pTab, &pCur->rdr);
666 break;
667 }
drh724b1892016-05-31 16:22:48 +0000668 if( i<pTab->nCol ){
drhac9c3d22016-06-03 01:01:57 +0000669 if( pCur->aLen[i] < pCur->rdr.n+1 ){
drh11499f02016-07-09 16:38:25 +0000670 char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
drhac9c3d22016-06-03 01:01:57 +0000671 if( zNew==0 ){
672 csv_errmsg(&pCur->rdr, "out of memory");
673 csv_xfer_error(pTab, &pCur->rdr);
674 break;
675 }
676 pCur->azVal[i] = zNew;
677 pCur->aLen[i] = pCur->rdr.n+1;
678 }
679 memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
680 i++;
drh724b1892016-05-31 16:22:48 +0000681 }
drhac9c3d22016-06-03 01:01:57 +0000682 }while( pCur->rdr.cTerm==',' );
drh4f573522017-08-08 20:03:10 +0000683 if( z==0 || (pCur->rdr.cTerm==EOF && i<pTab->nCol) ){
drh724b1892016-05-31 16:22:48 +0000684 pCur->iRowid = -1;
685 }else{
686 pCur->iRowid++;
drh4f573522017-08-08 20:03:10 +0000687 while( i<pTab->nCol ){
688 sqlite3_free(pCur->azVal[i]);
689 pCur->azVal[i] = 0;
690 pCur->aLen[i] = 0;
691 i++;
692 }
drh724b1892016-05-31 16:22:48 +0000693 }
694 return SQLITE_OK;
695}
696
697/*
698** Return values of columns for the row at which the CsvCursor
699** is currently pointing.
700*/
701static int csvtabColumn(
702 sqlite3_vtab_cursor *cur, /* The cursor */
703 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
704 int i /* Which column to return */
705){
706 CsvCursor *pCur = (CsvCursor*)cur;
707 CsvTable *pTab = (CsvTable*)cur->pVtab;
708 if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
709 sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC);
710 }
711 return SQLITE_OK;
712}
713
714/*
715** Return the rowid for the current row.
716*/
717static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
718 CsvCursor *pCur = (CsvCursor*)cur;
719 *pRowid = pCur->iRowid;
720 return SQLITE_OK;
721}
722
723/*
724** Return TRUE if the cursor has been moved off of the last
725** row of output.
726*/
727static int csvtabEof(sqlite3_vtab_cursor *cur){
728 CsvCursor *pCur = (CsvCursor*)cur;
729 return pCur->iRowid<0;
730}
731
732/*
733** Only a full table scan is supported. So xFilter simply rewinds to
734** the beginning.
735*/
736static int csvtabFilter(
737 sqlite3_vtab_cursor *pVtabCursor,
738 int idxNum, const char *idxStr,
739 int argc, sqlite3_value **argv
740){
741 CsvCursor *pCur = (CsvCursor*)pVtabCursor;
742 CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
743 pCur->iRowid = 0;
drhadcba642016-06-02 17:44:24 +0000744 if( pCur->rdr.in==0 ){
745 assert( pCur->rdr.zIn==pTab->zData );
mistachkin80f2b332016-07-22 21:26:56 +0000746 assert( pTab->iStart>=0 );
747 assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
drhadcba642016-06-02 17:44:24 +0000748 pCur->rdr.iIn = pTab->iStart;
749 }else{
750 fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
751 pCur->rdr.iIn = 0;
752 pCur->rdr.nIn = 0;
753 }
drh724b1892016-05-31 16:22:48 +0000754 return csvtabNext(pVtabCursor);
755}
756
757/*
drhadcba642016-06-02 17:44:24 +0000758** Only a forward full table scan is supported. xBestIndex is mostly
drhabfd2722016-05-31 18:08:35 +0000759** a no-op. If CSVTEST_FIDX is set, then the presence of equality
760** constraints lowers the estimated cost, which is fiction, but is useful
761** for testing certain kinds of virtual table behavior.
drh724b1892016-05-31 16:22:48 +0000762*/
763static int csvtabBestIndex(
764 sqlite3_vtab *tab,
765 sqlite3_index_info *pIdxInfo
766){
drhabfd2722016-05-31 18:08:35 +0000767 pIdxInfo->estimatedCost = 1000000;
drhac9c3d22016-06-03 01:01:57 +0000768#ifdef SQLITE_TEST
769 if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
770 /* The usual (and sensible) case is to always do a full table scan.
771 ** The code in this branch only runs when testflags=1. This code
772 ** generates an artifical and unrealistic plan which is useful
773 ** for testing virtual table logic but is not helpful to real applications.
774 **
775 ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
776 ** table (even though it is not) and the cost of running the virtual table
777 ** is reduced from 1 million to just 10. The constraints are *not* marked
778 ** as omittable, however, so the query planner should still generate a
779 ** plan that gives a correct answer, even if they plan is not optimal.
780 */
781 int i;
782 int nConst = 0;
783 for(i=0; i<pIdxInfo->nConstraint; i++){
784 unsigned char op;
785 if( pIdxInfo->aConstraint[i].usable==0 ) continue;
786 op = pIdxInfo->aConstraint[i].op;
787 if( op==SQLITE_INDEX_CONSTRAINT_EQ
788 || op==SQLITE_INDEX_CONSTRAINT_LIKE
789 || op==SQLITE_INDEX_CONSTRAINT_GLOB
790 ){
791 pIdxInfo->estimatedCost = 10;
792 pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
793 nConst++;
794 }
drhabfd2722016-05-31 18:08:35 +0000795 }
796 }
drhac9c3d22016-06-03 01:01:57 +0000797#endif
drh724b1892016-05-31 16:22:48 +0000798 return SQLITE_OK;
799}
800
801
802static sqlite3_module CsvModule = {
803 0, /* iVersion */
804 csvtabCreate, /* xCreate */
805 csvtabConnect, /* xConnect */
806 csvtabBestIndex, /* xBestIndex */
807 csvtabDisconnect, /* xDisconnect */
808 csvtabDisconnect, /* xDestroy */
809 csvtabOpen, /* xOpen - open a cursor */
810 csvtabClose, /* xClose - close a cursor */
811 csvtabFilter, /* xFilter - configure scan constraints */
812 csvtabNext, /* xNext - advance a cursor */
813 csvtabEof, /* xEof - check for end of scan */
814 csvtabColumn, /* xColumn - read data */
815 csvtabRowid, /* xRowid - read data */
816 0, /* xUpdate */
817 0, /* xBegin */
818 0, /* xSync */
819 0, /* xCommit */
820 0, /* xRollback */
821 0, /* xFindMethod */
822 0, /* xRename */
823};
824
drhac9c3d22016-06-03 01:01:57 +0000825#ifdef SQLITE_TEST
826/*
827** For virtual table testing, make a version of the CSV virtual table
828** available that has an xUpdate function. But the xUpdate always returns
829** SQLITE_READONLY since the CSV file is not really writable.
830*/
831static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
832 return SQLITE_READONLY;
833}
834static sqlite3_module CsvModuleFauxWrite = {
835 0, /* iVersion */
836 csvtabCreate, /* xCreate */
837 csvtabConnect, /* xConnect */
838 csvtabBestIndex, /* xBestIndex */
839 csvtabDisconnect, /* xDisconnect */
840 csvtabDisconnect, /* xDestroy */
841 csvtabOpen, /* xOpen - open a cursor */
842 csvtabClose, /* xClose - close a cursor */
843 csvtabFilter, /* xFilter - configure scan constraints */
844 csvtabNext, /* xNext - advance a cursor */
845 csvtabEof, /* xEof - check for end of scan */
846 csvtabColumn, /* xColumn - read data */
847 csvtabRowid, /* xRowid - read data */
848 csvtabUpdate, /* xUpdate */
849 0, /* xBegin */
850 0, /* xSync */
851 0, /* xCommit */
852 0, /* xRollback */
853 0, /* xFindMethod */
854 0, /* xRename */
855};
856#endif /* SQLITE_TEST */
857
drheb5a5492016-07-15 02:50:18 +0000858#endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
drhac9c3d22016-06-03 01:01:57 +0000859
860
drh724b1892016-05-31 16:22:48 +0000861#ifdef _WIN32
862__declspec(dllexport)
863#endif
864/*
865** This routine is called when the extension is loaded. The new
866** CSV virtual table module is registered with the calling database
867** connection.
868*/
869int sqlite3_csv_init(
870 sqlite3 *db,
871 char **pzErrMsg,
872 const sqlite3_api_routines *pApi
873){
drheb5a5492016-07-15 02:50:18 +0000874#ifndef SQLITE_OMIT_VIRTUALTABLE
drhac9c3d22016-06-03 01:01:57 +0000875 int rc;
drh724b1892016-05-31 16:22:48 +0000876 SQLITE_EXTENSION_INIT2(pApi);
drhac9c3d22016-06-03 01:01:57 +0000877 rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
878#ifdef SQLITE_TEST
879 if( rc==SQLITE_OK ){
880 rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
881 }
882#endif
883 return rc;
drheb5a5492016-07-15 02:50:18 +0000884#else
885 return SQLITE_OK;
886#endif
drh724b1892016-05-31 16:22:48 +0000887}