blob: f887784ab858db3b4e1402c10c73b50446cffdef [file] [log] [blame]
drh724b1892016-05-31 16:22:48 +00001/*
2** 2016-05-28
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11******************************************************************************
12**
13** This file contains the implementation of an SQLite virtual table for
14** reading CSV files.
15**
16** Usage:
17**
18** .load ./csv
19** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
20** SELECT * FROM csv;
21**
22** The columns are named "c1", "c2", "c3", ... by default. But the
23** application can define its own CREATE TABLE statement as an additional
24** parameter. For example:
25**
26** CREATE VIRTUAL TABLE temp.csv2 USING csv(
27** filename = "../http.log",
28** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
29** );
drhac9c3d22016-06-03 01:01:57 +000030**
31** Instead of specifying a file, the text of the CSV can be loaded using
32** the data= parameter.
33**
34** If the columns=N parameter is supplied, then the CSV file is assumed to have
35** N columns. If the columns parameter is omitted, the CSV file is opened
36** as soon as the virtual table is constructed and the first row of the CSV
37** is read in order to count the tables.
38**
39** Some extra debugging features (used for testing virtual tables) are available
40** if this module is compiled with -DSQLITE_TEST.
drh724b1892016-05-31 16:22:48 +000041*/
42#include <sqlite3ext.h>
43SQLITE_EXTENSION_INIT1
44#include <string.h>
45#include <stdlib.h>
46#include <assert.h>
47#include <stdarg.h>
48#include <ctype.h>
49#include <stdio.h>
50
drheb5a5492016-07-15 02:50:18 +000051#ifndef SQLITE_OMIT_VIRTUALTABLE
52
drh724b1892016-05-31 16:22:48 +000053/*
54** A macro to hint to the compiler that a function should not be
55** inlined.
56*/
57#if defined(__GNUC__)
58# define CSV_NOINLINE __attribute__((noinline))
59#elif defined(_MSC_VER) && _MSC_VER>=1310
60# define CSV_NOINLINE __declspec(noinline)
61#else
62# define CSV_NOINLINE
63#endif
64
65
66/* Max size of the error message in a CsvReader */
67#define CSV_MXERR 200
68
drhadcba642016-06-02 17:44:24 +000069/* Size of the CsvReader input buffer */
70#define CSV_INBUFSZ 1024
71
drh724b1892016-05-31 16:22:48 +000072/* A context object used when read a CSV file. */
73typedef struct CsvReader CsvReader;
74struct CsvReader {
75 FILE *in; /* Read the CSV text from this input stream */
76 char *z; /* Accumulated text for a field */
77 int n; /* Number of bytes in z */
78 int nAlloc; /* Space allocated for z[] */
79 int nLine; /* Current line number */
drhadcba642016-06-02 17:44:24 +000080 char cTerm; /* Character that terminated the most recent field */
81 size_t iIn; /* Next unread character in the input buffer */
82 size_t nIn; /* Number of characters in the input buffer */
83 char *zIn; /* The input buffer */
drh724b1892016-05-31 16:22:48 +000084 char zErr[CSV_MXERR]; /* Error message */
85};
86
87/* Initialize a CsvReader object */
88static void csv_reader_init(CsvReader *p){
drhadcba642016-06-02 17:44:24 +000089 p->in = 0;
90 p->z = 0;
91 p->n = 0;
92 p->nAlloc = 0;
93 p->nLine = 0;
94 p->nIn = 0;
95 p->zIn = 0;
96 p->zErr[0] = 0;
drh724b1892016-05-31 16:22:48 +000097}
98
99/* Close and reset a CsvReader object */
100static void csv_reader_reset(CsvReader *p){
drhadcba642016-06-02 17:44:24 +0000101 if( p->in ){
102 fclose(p->in);
103 sqlite3_free(p->zIn);
104 }
drh724b1892016-05-31 16:22:48 +0000105 sqlite3_free(p->z);
106 csv_reader_init(p);
107}
108
109/* Report an error on a CsvReader */
110static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
111 va_list ap;
112 va_start(ap, zFormat);
113 sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
114 va_end(ap);
115}
116
117/* Open the file associated with a CsvReader
118** Return the number of errors.
119*/
drhadcba642016-06-02 17:44:24 +0000120static int csv_reader_open(
121 CsvReader *p, /* The reader to open */
122 const char *zFilename, /* Read from this filename */
123 const char *zData /* ... or use this data */
124){
125 if( zFilename ){
126 p->zIn = sqlite3_malloc( CSV_INBUFSZ );
127 if( p->zIn==0 ){
128 csv_errmsg(p, "out of memory");
129 return 1;
130 }
131 p->in = fopen(zFilename, "rb");
132 if( p->in==0 ){
133 csv_reader_reset(p);
134 csv_errmsg(p, "cannot open '%s' for reading", zFilename);
135 return 1;
136 }
137 }else{
138 assert( p->in==0 );
139 p->zIn = (char*)zData;
140 p->nIn = strlen(zData);
drh724b1892016-05-31 16:22:48 +0000141 }
142 return 0;
143}
144
drhadcba642016-06-02 17:44:24 +0000145/* The input buffer has overflowed. Refill the input buffer, then
146** return the next character
147*/
148static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
149 size_t got;
150
151 assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */
152 assert( p->in!=0 ); /* Only called if reading froma file */
153
154 got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
155 if( got==0 ) return EOF;
156 p->nIn = got;
157 p->iIn = 1;
158 return p->zIn[0];
159}
160
161/* Return the next character of input. Return EOF at end of input. */
162static int csv_getc(CsvReader *p){
163 if( p->iIn >= p->nIn ){
164 if( p->in!=0 ) return csv_getc_refill(p);
165 return EOF;
166 }
167 return p->zIn[p->iIn++];
168}
169
drh724b1892016-05-31 16:22:48 +0000170/* Increase the size of p->z and append character c to the end.
171** Return 0 on success and non-zero if there is an OOM error */
172static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
173 char *zNew;
174 int nNew = p->nAlloc*2 + 100;
175 zNew = sqlite3_realloc64(p->z, nNew);
176 if( zNew ){
177 p->z = zNew;
178 p->nAlloc = nNew;
179 p->z[p->n++] = c;
180 return 0;
181 }else{
182 csv_errmsg(p, "out of memory");
183 return 1;
184 }
185}
186
187/* Append a single character to the CsvReader.z[] array.
188** Return 0 on success and non-zero if there is an OOM error */
189static int csv_append(CsvReader *p, char c){
190 if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
191 p->z[p->n++] = c;
192 return 0;
193}
194
195/* Read a single field of CSV text. Compatible with rfc4180 and extended
196** with the option of having a separator other than ",".
197**
198** + Input comes from p->in.
199** + Store results in p->z of length p->n. Space to hold p->z comes
200** from sqlite3_malloc64().
201** + Keep track of the line number in p->nLine.
202** + Store the character that terminates the field in p->cTerm. Store
203** EOF on end-of-file.
204**
205** Return "" at EOF. Return 0 on an OOM error.
206*/
207static char *csv_read_one_field(CsvReader *p){
208 int c;
209 p->n = 0;
drhadcba642016-06-02 17:44:24 +0000210 c = csv_getc(p);
drh724b1892016-05-31 16:22:48 +0000211 if( c==EOF ){
212 p->cTerm = EOF;
213 return "";
214 }
215 if( c=='"' ){
216 int pc, ppc;
217 int startLine = p->nLine;
drh724b1892016-05-31 16:22:48 +0000218 pc = ppc = 0;
219 while( 1 ){
drhadcba642016-06-02 17:44:24 +0000220 c = csv_getc(p);
drhac9c3d22016-06-03 01:01:57 +0000221 if( c<='"' || pc=='"' ){
222 if( c=='\n' ) p->nLine++;
223 if( c=='"' ){
224 if( pc=='"' ){
225 pc = 0;
226 continue;
227 }
drh724b1892016-05-31 16:22:48 +0000228 }
drhac9c3d22016-06-03 01:01:57 +0000229 if( (c==',' && pc=='"')
230 || (c=='\n' && pc=='"')
231 || (c=='\n' && pc=='\r' && ppc=='"')
232 || (c==EOF && pc=='"')
233 ){
234 do{ p->n--; }while( p->z[p->n]!='"' );
mistachkin80f2b332016-07-22 21:26:56 +0000235 p->cTerm = (char)c;
drhac9c3d22016-06-03 01:01:57 +0000236 break;
237 }
238 if( pc=='"' && c!='\r' ){
239 csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
240 break;
241 }
242 if( c==EOF ){
243 csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
244 startLine, '"');
mistachkin80f2b332016-07-22 21:26:56 +0000245 p->cTerm = (char)c;
drhac9c3d22016-06-03 01:01:57 +0000246 break;
247 }
drh724b1892016-05-31 16:22:48 +0000248 }
249 if( csv_append(p, (char)c) ) return 0;
250 ppc = pc;
251 pc = c;
252 }
253 }else{
drhac9c3d22016-06-03 01:01:57 +0000254 while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
drh724b1892016-05-31 16:22:48 +0000255 if( csv_append(p, (char)c) ) return 0;
drhadcba642016-06-02 17:44:24 +0000256 c = csv_getc(p);
drh724b1892016-05-31 16:22:48 +0000257 }
258 if( c=='\n' ){
259 p->nLine++;
260 if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
261 }
mistachkin80f2b332016-07-22 21:26:56 +0000262 p->cTerm = (char)c;
drh724b1892016-05-31 16:22:48 +0000263 }
264 if( p->z ) p->z[p->n] = 0;
265 return p->z;
266}
267
268
269/* Forward references to the various virtual table methods implemented
270** in this file. */
271static int csvtabCreate(sqlite3*, void*, int, const char*const*,
272 sqlite3_vtab**,char**);
273static int csvtabConnect(sqlite3*, void*, int, const char*const*,
274 sqlite3_vtab**,char**);
275static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
276static int csvtabDisconnect(sqlite3_vtab*);
277static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
278static int csvtabClose(sqlite3_vtab_cursor*);
279static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
280 int argc, sqlite3_value **argv);
281static int csvtabNext(sqlite3_vtab_cursor*);
282static int csvtabEof(sqlite3_vtab_cursor*);
283static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
284static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
285
286/* An instance of the CSV virtual table */
287typedef struct CsvTable {
288 sqlite3_vtab base; /* Base class. Must be first */
289 char *zFilename; /* Name of the CSV file */
drhadcba642016-06-02 17:44:24 +0000290 char *zData; /* Raw CSV data in lieu of zFilename */
drh724b1892016-05-31 16:22:48 +0000291 long iStart; /* Offset to start of data in zFilename */
292 int nCol; /* Number of columns in the CSV file */
drhabfd2722016-05-31 18:08:35 +0000293 unsigned int tstFlags; /* Bit values used for testing */
drh724b1892016-05-31 16:22:48 +0000294} CsvTable;
295
drhabfd2722016-05-31 18:08:35 +0000296/* Allowed values for tstFlags */
297#define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/
298
drh724b1892016-05-31 16:22:48 +0000299/* A cursor for the CSV virtual table */
300typedef struct CsvCursor {
301 sqlite3_vtab_cursor base; /* Base class. Must be first */
302 CsvReader rdr; /* The CsvReader object */
303 char **azVal; /* Value of the current row */
drhac9c3d22016-06-03 01:01:57 +0000304 int *aLen; /* Length of each entry */
drh724b1892016-05-31 16:22:48 +0000305 sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */
306} CsvCursor;
307
308/* Transfer error message text from a reader into a CsvTable */
309static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
310 sqlite3_free(pTab->base.zErrMsg);
311 pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
312}
313
314/*
315** This method is the destructor fo a CsvTable object.
316*/
317static int csvtabDisconnect(sqlite3_vtab *pVtab){
318 CsvTable *p = (CsvTable*)pVtab;
319 sqlite3_free(p->zFilename);
drh35db31b2016-06-02 23:13:21 +0000320 sqlite3_free(p->zData);
drh724b1892016-05-31 16:22:48 +0000321 sqlite3_free(p);
322 return SQLITE_OK;
323}
324
325/* Skip leading whitespace. Return a pointer to the first non-whitespace
326** character, or to the zero terminator if the string has only whitespace */
327static const char *csv_skip_whitespace(const char *z){
328 while( isspace((unsigned char)z[0]) ) z++;
329 return z;
330}
331
332/* Remove trailing whitespace from the end of string z[] */
333static void csv_trim_whitespace(char *z){
334 size_t n = strlen(z);
335 while( n>0 && isspace((unsigned char)z[n]) ) n--;
336 z[n] = 0;
337}
338
339/* Dequote the string */
340static void csv_dequote(char *z){
mistachkin80f2b332016-07-22 21:26:56 +0000341 int j;
drh724b1892016-05-31 16:22:48 +0000342 char cQuote = z[0];
mistachkin80f2b332016-07-22 21:26:56 +0000343 size_t i, n;
drh724b1892016-05-31 16:22:48 +0000344
345 if( cQuote!='\'' && cQuote!='"' ) return;
346 n = strlen(z);
347 if( n<2 || z[n-1]!=z[0] ) return;
348 for(i=1, j=0; i<n-1; i++){
349 if( z[i]==cQuote && z[i+1]==cQuote ) i++;
350 z[j++] = z[i];
351 }
352 z[j] = 0;
353}
354
355/* Check to see if the string is of the form: "TAG = VALUE" with optional
356** whitespace before and around tokens. If it is, return a pointer to the
357** first character of VALUE. If it is not, return NULL.
358*/
359static const char *csv_parameter(const char *zTag, int nTag, const char *z){
360 z = csv_skip_whitespace(z);
361 if( strncmp(zTag, z, nTag)!=0 ) return 0;
362 z = csv_skip_whitespace(z+nTag);
363 if( z[0]!='=' ) return 0;
364 return csv_skip_whitespace(z+1);
365}
366
drhadcba642016-06-02 17:44:24 +0000367/* Decode a parameter that requires a dequoted string.
368**
369** Return 1 if the parameter is seen, or 0 if not. 1 is returned
370** even if there is an error. If an error occurs, then an error message
371** is left in p->zErr. If there are no errors, p->zErr[0]==0.
372*/
373static int csv_string_parameter(
374 CsvReader *p, /* Leave the error message here, if there is one */
375 const char *zParam, /* Parameter we are checking for */
376 const char *zArg, /* Raw text of the virtual table argment */
377 char **pzVal /* Write the dequoted string value here */
378){
379 const char *zValue;
drh11499f02016-07-09 16:38:25 +0000380 zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
drhadcba642016-06-02 17:44:24 +0000381 if( zValue==0 ) return 0;
382 p->zErr[0] = 0;
383 if( *pzVal ){
384 csv_errmsg(p, "more than one '%s' parameter", zParam);
385 return 1;
386 }
387 *pzVal = sqlite3_mprintf("%s", zValue);
388 if( *pzVal==0 ){
389 csv_errmsg(p, "out of memory");
390 return 1;
391 }
392 csv_trim_whitespace(*pzVal);
393 csv_dequote(*pzVal);
394 return 1;
395}
396
397
drh724b1892016-05-31 16:22:48 +0000398/* Return 0 if the argument is false and 1 if it is true. Return -1 if
399** we cannot really tell.
400*/
401static int csv_boolean(const char *z){
402 if( sqlite3_stricmp("yes",z)==0
403 || sqlite3_stricmp("on",z)==0
404 || sqlite3_stricmp("true",z)==0
mistachkin58282f62016-11-09 01:46:13 +0000405 || (z[0]=='1' && z[1]==0)
drh724b1892016-05-31 16:22:48 +0000406 ){
407 return 1;
408 }
409 if( sqlite3_stricmp("no",z)==0
410 || sqlite3_stricmp("off",z)==0
411 || sqlite3_stricmp("false",z)==0
412 || (z[0]=='0' && z[1]==0)
413 ){
414 return 0;
415 }
416 return -1;
417}
418
419
420/*
421** Parameters:
drhadcba642016-06-02 17:44:24 +0000422** filename=FILENAME Name of file containing CSV content
423** data=TEXT Direct CSV content.
drh1fc1a0f2016-05-31 18:44:33 +0000424** schema=SCHEMA Alternative CSV schema.
drh724b1892016-05-31 16:22:48 +0000425** header=YES|NO First row of CSV defines the names of
426** columns if "yes". Default "no".
drhadcba642016-06-02 17:44:24 +0000427** columns=N Assume the CSV file contains N columns.
drhac9c3d22016-06-03 01:01:57 +0000428**
429** Only available if compiled with SQLITE_TEST:
430**
drhabfd2722016-05-31 18:08:35 +0000431** testflags=N Bitmask of test flags. Optional
drh724b1892016-05-31 16:22:48 +0000432**
drh1fc1a0f2016-05-31 18:44:33 +0000433** If schema= is omitted, then the columns are named "c0", "c1", "c2",
434** and so forth. If columns=N is omitted, then the file is opened and
435** the number of columns in the first row is counted to determine the
436** column count. If header=YES, then the first row is skipped.
drh724b1892016-05-31 16:22:48 +0000437*/
438static int csvtabConnect(
439 sqlite3 *db,
440 void *pAux,
441 int argc, const char *const*argv,
442 sqlite3_vtab **ppVtab,
443 char **pzErr
444){
drh1fc1a0f2016-05-31 18:44:33 +0000445 CsvTable *pNew = 0; /* The CsvTable object to construct */
446 int bHeader = -1; /* header= flags. -1 means not seen yet */
447 int rc = SQLITE_OK; /* Result code from this routine */
drhadcba642016-06-02 17:44:24 +0000448 int i, j; /* Loop counters */
drhac9c3d22016-06-03 01:01:57 +0000449#ifdef SQLITE_TEST
drhadcba642016-06-02 17:44:24 +0000450 int tstFlags = 0; /* Value for testflags=N parameter */
drhac9c3d22016-06-03 01:01:57 +0000451#endif
drh1fc1a0f2016-05-31 18:44:33 +0000452 int nCol = -99; /* Value of the columns= parameter */
453 CsvReader sRdr; /* A CSV file reader used to store an error
454 ** message and/or to count the number of columns */
drhadcba642016-06-02 17:44:24 +0000455 static const char *azParam[] = {
456 "filename", "data", "schema",
457 };
458 char *azPValue[3]; /* Parameter values */
459# define CSV_FILENAME (azPValue[0])
460# define CSV_DATA (azPValue[1])
461# define CSV_SCHEMA (azPValue[2])
drh724b1892016-05-31 16:22:48 +0000462
drhadcba642016-06-02 17:44:24 +0000463
464 assert( sizeof(azPValue)==sizeof(azParam) );
drh724b1892016-05-31 16:22:48 +0000465 memset(&sRdr, 0, sizeof(sRdr));
drhadcba642016-06-02 17:44:24 +0000466 memset(azPValue, 0, sizeof(azPValue));
drh724b1892016-05-31 16:22:48 +0000467 for(i=3; i<argc; i++){
468 const char *z = argv[i];
469 const char *zValue;
drhadcba642016-06-02 17:44:24 +0000470 for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
471 if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
472 }
473 if( j<sizeof(azParam)/sizeof(azParam[0]) ){
474 if( sRdr.zErr[0] ) goto csvtab_connect_error;
drh724b1892016-05-31 16:22:48 +0000475 }else
476 if( (zValue = csv_parameter("header",6,z))!=0 ){
477 int x;
478 if( bHeader>=0 ){
479 csv_errmsg(&sRdr, "more than one 'header' parameter");
480 goto csvtab_connect_error;
481 }
482 x = csv_boolean(zValue);
483 if( x==1 ){
484 bHeader = 1;
485 }else if( x==0 ){
486 bHeader = 0;
487 }else{
488 csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue);
489 goto csvtab_connect_error;
490 }
491 }else
drhac9c3d22016-06-03 01:01:57 +0000492#ifdef SQLITE_TEST
drhabfd2722016-05-31 18:08:35 +0000493 if( (zValue = csv_parameter("testflags",9,z))!=0 ){
494 tstFlags = (unsigned int)atoi(zValue);
495 }else
drhac9c3d22016-06-03 01:01:57 +0000496#endif
drh1fc1a0f2016-05-31 18:44:33 +0000497 if( (zValue = csv_parameter("columns",7,z))!=0 ){
498 if( nCol>0 ){
499 csv_errmsg(&sRdr, "more than one 'columns' parameter");
500 goto csvtab_connect_error;
501 }
502 nCol = atoi(zValue);
503 if( nCol<=0 ){
504 csv_errmsg(&sRdr, "must have at least one column");
505 goto csvtab_connect_error;
506 }
507 }else
drh724b1892016-05-31 16:22:48 +0000508 {
509 csv_errmsg(&sRdr, "unrecognized parameter '%s'", z);
510 goto csvtab_connect_error;
511 }
512 }
drhadcba642016-06-02 17:44:24 +0000513 if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
514 csv_errmsg(&sRdr, "must either filename= or data= but not both");
drh724b1892016-05-31 16:22:48 +0000515 goto csvtab_connect_error;
516 }
drhadcba642016-06-02 17:44:24 +0000517 if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){
drh724b1892016-05-31 16:22:48 +0000518 goto csvtab_connect_error;
519 }
520 pNew = sqlite3_malloc( sizeof(*pNew) );
521 *ppVtab = (sqlite3_vtab*)pNew;
522 if( pNew==0 ) goto csvtab_connect_oom;
523 memset(pNew, 0, sizeof(*pNew));
drh1fc1a0f2016-05-31 18:44:33 +0000524 if( nCol>0 ){
525 pNew->nCol = nCol;
526 }else{
527 do{
528 const char *z = csv_read_one_field(&sRdr);
529 if( z==0 ) goto csvtab_connect_oom;
530 pNew->nCol++;
531 }while( sRdr.cTerm==',' );
532 }
drhadcba642016-06-02 17:44:24 +0000533 pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
534 pNew->zData = CSV_DATA; CSV_DATA = 0;
drhac9c3d22016-06-03 01:01:57 +0000535#ifdef SQLITE_TEST
drhabfd2722016-05-31 18:08:35 +0000536 pNew->tstFlags = tstFlags;
drhac9c3d22016-06-03 01:01:57 +0000537#endif
drh724b1892016-05-31 16:22:48 +0000538 pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
539 csv_reader_reset(&sRdr);
drhadcba642016-06-02 17:44:24 +0000540 if( CSV_SCHEMA==0 ){
drh724b1892016-05-31 16:22:48 +0000541 char *zSep = "";
drhadcba642016-06-02 17:44:24 +0000542 CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x(");
543 if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
drh724b1892016-05-31 16:22:48 +0000544 for(i=0; i<pNew->nCol; i++){
drhadcba642016-06-02 17:44:24 +0000545 CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
drh724b1892016-05-31 16:22:48 +0000546 zSep = ",";
547 }
drhadcba642016-06-02 17:44:24 +0000548 CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA);
drh724b1892016-05-31 16:22:48 +0000549 }
drhadcba642016-06-02 17:44:24 +0000550 rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
drh724b1892016-05-31 16:22:48 +0000551 if( rc ) goto csvtab_connect_error;
drhadcba642016-06-02 17:44:24 +0000552 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
553 sqlite3_free(azPValue[i]);
554 }
drh724b1892016-05-31 16:22:48 +0000555 return SQLITE_OK;
556
557csvtab_connect_oom:
558 rc = SQLITE_NOMEM;
559 csv_errmsg(&sRdr, "out of memory");
560
561csvtab_connect_error:
562 if( pNew ) csvtabDisconnect(&pNew->base);
drhadcba642016-06-02 17:44:24 +0000563 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
564 sqlite3_free(azPValue[i]);
565 }
drh724b1892016-05-31 16:22:48 +0000566 if( sRdr.zErr[0] ){
567 sqlite3_free(*pzErr);
568 *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
569 }
570 csv_reader_reset(&sRdr);
drhabfd2722016-05-31 18:08:35 +0000571 if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
drh724b1892016-05-31 16:22:48 +0000572 return rc;
573}
574
575/*
576** Reset the current row content held by a CsvCursor.
577*/
578static void csvtabCursorRowReset(CsvCursor *pCur){
579 CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
580 int i;
581 for(i=0; i<pTab->nCol; i++){
582 sqlite3_free(pCur->azVal[i]);
583 pCur->azVal[i] = 0;
drhac9c3d22016-06-03 01:01:57 +0000584 pCur->aLen[i] = 0;
drh724b1892016-05-31 16:22:48 +0000585 }
586}
587
588/*
589** The xConnect and xCreate methods do the same thing, but they must be
590** different so that the virtual table is not an eponymous virtual table.
591*/
592static int csvtabCreate(
593 sqlite3 *db,
594 void *pAux,
595 int argc, const char *const*argv,
596 sqlite3_vtab **ppVtab,
597 char **pzErr
598){
599 return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
600}
601
602/*
603** Destructor for a CsvCursor.
604*/
605static int csvtabClose(sqlite3_vtab_cursor *cur){
606 CsvCursor *pCur = (CsvCursor*)cur;
607 csvtabCursorRowReset(pCur);
608 csv_reader_reset(&pCur->rdr);
609 sqlite3_free(cur);
610 return SQLITE_OK;
611}
612
613/*
614** Constructor for a new CsvTable cursor object.
615*/
616static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
617 CsvTable *pTab = (CsvTable*)p;
618 CsvCursor *pCur;
drhac9c3d22016-06-03 01:01:57 +0000619 size_t nByte;
620 nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
drh11499f02016-07-09 16:38:25 +0000621 pCur = sqlite3_malloc64( nByte );
drh724b1892016-05-31 16:22:48 +0000622 if( pCur==0 ) return SQLITE_NOMEM;
drhac9c3d22016-06-03 01:01:57 +0000623 memset(pCur, 0, nByte);
drh724b1892016-05-31 16:22:48 +0000624 pCur->azVal = (char**)&pCur[1];
drhac9c3d22016-06-03 01:01:57 +0000625 pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
drh724b1892016-05-31 16:22:48 +0000626 *ppCursor = &pCur->base;
drhadcba642016-06-02 17:44:24 +0000627 if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
drh724b1892016-05-31 16:22:48 +0000628 csv_xfer_error(pTab, &pCur->rdr);
629 return SQLITE_ERROR;
630 }
631 return SQLITE_OK;
632}
633
634
635/*
636** Advance a CsvCursor to its next row of input.
637** Set the EOF marker if we reach the end of input.
638*/
639static int csvtabNext(sqlite3_vtab_cursor *cur){
640 CsvCursor *pCur = (CsvCursor*)cur;
641 CsvTable *pTab = (CsvTable*)cur->pVtab;
642 int i = 0;
643 char *z;
drh724b1892016-05-31 16:22:48 +0000644 do{
645 z = csv_read_one_field(&pCur->rdr);
646 if( z==0 ){
647 csv_xfer_error(pTab, &pCur->rdr);
648 break;
649 }
drh724b1892016-05-31 16:22:48 +0000650 if( i<pTab->nCol ){
drhac9c3d22016-06-03 01:01:57 +0000651 if( pCur->aLen[i] < pCur->rdr.n+1 ){
drh11499f02016-07-09 16:38:25 +0000652 char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
drhac9c3d22016-06-03 01:01:57 +0000653 if( zNew==0 ){
654 csv_errmsg(&pCur->rdr, "out of memory");
655 csv_xfer_error(pTab, &pCur->rdr);
656 break;
657 }
658 pCur->azVal[i] = zNew;
659 pCur->aLen[i] = pCur->rdr.n+1;
660 }
661 memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
662 i++;
drh724b1892016-05-31 16:22:48 +0000663 }
drhac9c3d22016-06-03 01:01:57 +0000664 }while( pCur->rdr.cTerm==',' );
665 while( i<pTab->nCol ){
666 sqlite3_free(pCur->azVal[i]);
667 pCur->azVal[i] = 0;
668 pCur->aLen[i] = 0;
669 i++;
670 }
drh724b1892016-05-31 16:22:48 +0000671 if( z==0 || pCur->rdr.cTerm==EOF ){
672 pCur->iRowid = -1;
673 }else{
674 pCur->iRowid++;
675 }
676 return SQLITE_OK;
677}
678
679/*
680** Return values of columns for the row at which the CsvCursor
681** is currently pointing.
682*/
683static int csvtabColumn(
684 sqlite3_vtab_cursor *cur, /* The cursor */
685 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
686 int i /* Which column to return */
687){
688 CsvCursor *pCur = (CsvCursor*)cur;
689 CsvTable *pTab = (CsvTable*)cur->pVtab;
690 if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
691 sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC);
692 }
693 return SQLITE_OK;
694}
695
696/*
697** Return the rowid for the current row.
698*/
699static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
700 CsvCursor *pCur = (CsvCursor*)cur;
701 *pRowid = pCur->iRowid;
702 return SQLITE_OK;
703}
704
705/*
706** Return TRUE if the cursor has been moved off of the last
707** row of output.
708*/
709static int csvtabEof(sqlite3_vtab_cursor *cur){
710 CsvCursor *pCur = (CsvCursor*)cur;
711 return pCur->iRowid<0;
712}
713
714/*
715** Only a full table scan is supported. So xFilter simply rewinds to
716** the beginning.
717*/
718static int csvtabFilter(
719 sqlite3_vtab_cursor *pVtabCursor,
720 int idxNum, const char *idxStr,
721 int argc, sqlite3_value **argv
722){
723 CsvCursor *pCur = (CsvCursor*)pVtabCursor;
724 CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
725 pCur->iRowid = 0;
drhadcba642016-06-02 17:44:24 +0000726 if( pCur->rdr.in==0 ){
727 assert( pCur->rdr.zIn==pTab->zData );
mistachkin80f2b332016-07-22 21:26:56 +0000728 assert( pTab->iStart>=0 );
729 assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
drhadcba642016-06-02 17:44:24 +0000730 pCur->rdr.iIn = pTab->iStart;
731 }else{
732 fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
733 pCur->rdr.iIn = 0;
734 pCur->rdr.nIn = 0;
735 }
drh724b1892016-05-31 16:22:48 +0000736 return csvtabNext(pVtabCursor);
737}
738
739/*
drhadcba642016-06-02 17:44:24 +0000740** Only a forward full table scan is supported. xBestIndex is mostly
drhabfd2722016-05-31 18:08:35 +0000741** a no-op. If CSVTEST_FIDX is set, then the presence of equality
742** constraints lowers the estimated cost, which is fiction, but is useful
743** for testing certain kinds of virtual table behavior.
drh724b1892016-05-31 16:22:48 +0000744*/
745static int csvtabBestIndex(
746 sqlite3_vtab *tab,
747 sqlite3_index_info *pIdxInfo
748){
drhabfd2722016-05-31 18:08:35 +0000749 pIdxInfo->estimatedCost = 1000000;
drhac9c3d22016-06-03 01:01:57 +0000750#ifdef SQLITE_TEST
751 if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
752 /* The usual (and sensible) case is to always do a full table scan.
753 ** The code in this branch only runs when testflags=1. This code
754 ** generates an artifical and unrealistic plan which is useful
755 ** for testing virtual table logic but is not helpful to real applications.
756 **
757 ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
758 ** table (even though it is not) and the cost of running the virtual table
759 ** is reduced from 1 million to just 10. The constraints are *not* marked
760 ** as omittable, however, so the query planner should still generate a
761 ** plan that gives a correct answer, even if they plan is not optimal.
762 */
763 int i;
764 int nConst = 0;
765 for(i=0; i<pIdxInfo->nConstraint; i++){
766 unsigned char op;
767 if( pIdxInfo->aConstraint[i].usable==0 ) continue;
768 op = pIdxInfo->aConstraint[i].op;
769 if( op==SQLITE_INDEX_CONSTRAINT_EQ
770 || op==SQLITE_INDEX_CONSTRAINT_LIKE
771 || op==SQLITE_INDEX_CONSTRAINT_GLOB
772 ){
773 pIdxInfo->estimatedCost = 10;
774 pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
775 nConst++;
776 }
drhabfd2722016-05-31 18:08:35 +0000777 }
778 }
drhac9c3d22016-06-03 01:01:57 +0000779#endif
drh724b1892016-05-31 16:22:48 +0000780 return SQLITE_OK;
781}
782
783
784static sqlite3_module CsvModule = {
785 0, /* iVersion */
786 csvtabCreate, /* xCreate */
787 csvtabConnect, /* xConnect */
788 csvtabBestIndex, /* xBestIndex */
789 csvtabDisconnect, /* xDisconnect */
790 csvtabDisconnect, /* xDestroy */
791 csvtabOpen, /* xOpen - open a cursor */
792 csvtabClose, /* xClose - close a cursor */
793 csvtabFilter, /* xFilter - configure scan constraints */
794 csvtabNext, /* xNext - advance a cursor */
795 csvtabEof, /* xEof - check for end of scan */
796 csvtabColumn, /* xColumn - read data */
797 csvtabRowid, /* xRowid - read data */
798 0, /* xUpdate */
799 0, /* xBegin */
800 0, /* xSync */
801 0, /* xCommit */
802 0, /* xRollback */
803 0, /* xFindMethod */
804 0, /* xRename */
805};
806
drhac9c3d22016-06-03 01:01:57 +0000807#ifdef SQLITE_TEST
808/*
809** For virtual table testing, make a version of the CSV virtual table
810** available that has an xUpdate function. But the xUpdate always returns
811** SQLITE_READONLY since the CSV file is not really writable.
812*/
813static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
814 return SQLITE_READONLY;
815}
816static sqlite3_module CsvModuleFauxWrite = {
817 0, /* iVersion */
818 csvtabCreate, /* xCreate */
819 csvtabConnect, /* xConnect */
820 csvtabBestIndex, /* xBestIndex */
821 csvtabDisconnect, /* xDisconnect */
822 csvtabDisconnect, /* xDestroy */
823 csvtabOpen, /* xOpen - open a cursor */
824 csvtabClose, /* xClose - close a cursor */
825 csvtabFilter, /* xFilter - configure scan constraints */
826 csvtabNext, /* xNext - advance a cursor */
827 csvtabEof, /* xEof - check for end of scan */
828 csvtabColumn, /* xColumn - read data */
829 csvtabRowid, /* xRowid - read data */
830 csvtabUpdate, /* xUpdate */
831 0, /* xBegin */
832 0, /* xSync */
833 0, /* xCommit */
834 0, /* xRollback */
835 0, /* xFindMethod */
836 0, /* xRename */
837};
838#endif /* SQLITE_TEST */
839
drheb5a5492016-07-15 02:50:18 +0000840#endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
drhac9c3d22016-06-03 01:01:57 +0000841
842
drh724b1892016-05-31 16:22:48 +0000843#ifdef _WIN32
844__declspec(dllexport)
845#endif
846/*
847** This routine is called when the extension is loaded. The new
848** CSV virtual table module is registered with the calling database
849** connection.
850*/
851int sqlite3_csv_init(
852 sqlite3 *db,
853 char **pzErrMsg,
854 const sqlite3_api_routines *pApi
855){
drheb5a5492016-07-15 02:50:18 +0000856#ifndef SQLITE_OMIT_VIRTUALTABLE
drhac9c3d22016-06-03 01:01:57 +0000857 int rc;
drh724b1892016-05-31 16:22:48 +0000858 SQLITE_EXTENSION_INIT2(pApi);
drhac9c3d22016-06-03 01:01:57 +0000859 rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
860#ifdef SQLITE_TEST
861 if( rc==SQLITE_OK ){
862 rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
863 }
864#endif
865 return rc;
drheb5a5492016-07-15 02:50:18 +0000866#else
867 return SQLITE_OK;
868#endif
drh724b1892016-05-31 16:22:48 +0000869}