blob: 0000797b887aea4f8c5378258430a63f9d7577dc [file] [log] [blame]
dan92e497e2014-07-28 20:14:02 +00001/*
drhfcd43252015-07-13 23:55:08 +00002** 2014-07-28
dan92e497e2014-07-28 20:14:02 +00003**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
drh3547c512015-07-13 18:01:14 +000012**
13** This file implements a utility program that will load many disk
14** files (all files under a given directory) into a FTS table. This is
15** used for performance testing of FTS3, FTS4, and FTS5.
dan92e497e2014-07-28 20:14:02 +000016*/
17
18#include <stdio.h>
19#include <stdlib.h>
20#include <ctype.h>
21#include <assert.h>
22#include <string.h>
23#include <errno.h>
24#include <dirent.h>
25#include "sqlite3.h"
26
27/*
28** Implementation of the "readtext(X)" SQL function. The entire content
29** of the file named X is read and returned as a TEXT value. It is assumed
30** the file contains UTF-8 text. NULL is returned if the file does not
31** exist or is unreadable.
32*/
33static void readfileFunc(
34 sqlite3_context *context,
35 int argc,
36 sqlite3_value **argv
37){
38 const char *zName;
39 FILE *in;
40 long nIn;
41 void *pBuf;
42
43 zName = (const char*)sqlite3_value_text(argv[0]);
44 if( zName==0 ) return;
45 in = fopen(zName, "rb");
46 if( in==0 ) return;
47 fseek(in, 0, SEEK_END);
48 nIn = ftell(in);
49 rewind(in);
50 pBuf = sqlite3_malloc( nIn );
51 if( pBuf && 1==fread(pBuf, nIn, 1, in) ){
52 sqlite3_result_text(context, pBuf, nIn, sqlite3_free);
53 }else{
54 sqlite3_free(pBuf);
55 }
56 fclose(in);
57}
58
59/*
60** Print usage text for this program and exit.
61*/
62static void showHelp(const char *zArgv0){
63 printf("\n"
64"Usage: %s SWITCHES... DB\n"
65"\n"
66" This program opens the database named on the command line and attempts to\n"
67" create an FTS table named \"fts\" with a single column. If successful, it\n"
68" recursively traverses the directory named by the -dir option and inserts\n"
69" the contents of each file into the fts table. All files are assumed to\n"
70" contain UTF-8 text.\n"
71"\n"
72"Switches are:\n"
73" -fts [345] FTS version to use (default=5)\n"
74" -idx [01] Create a mapping from filename to rowid (default=0)\n"
75" -dir <path> Root of directory tree to load data from (default=.)\n"
danc7fe7a92014-08-11 19:44:52 +000076" -trans <integer> Number of inserts per transaction (default=1)\n"
dan92e497e2014-07-28 20:14:02 +000077, zArgv0
78);
79 exit(1);
80}
81
82/*
83** Exit with a message based on the argument and the current value of errno.
84*/
85static void error_out(const char *zText){
86 fprintf(stderr, "%s: %s\n", zText, strerror(errno));
87 exit(-1);
88}
89
90/*
91** Exit with a message based on the first argument and the error message
92** currently stored in database handle db.
93*/
94static void sqlite_error_out(const char *zText, sqlite3 *db){
95 fprintf(stderr, "%s: %s\n", zText, sqlite3_errmsg(db));
96 exit(-1);
97}
98
99/*
100** Context object for visit_file().
101*/
102typedef struct VisitContext VisitContext;
103struct VisitContext {
danc7fe7a92014-08-11 19:44:52 +0000104 int nRowPerTrans;
dan92e497e2014-07-28 20:14:02 +0000105 sqlite3 *db; /* Database handle */
106 sqlite3_stmt *pInsert; /* INSERT INTO fts VALUES(readtext(:1)) */
107};
108
109/*
110** Callback used with traverse(). The first argument points to an object
111** of type VisitContext. This function inserts the contents of the text
112** file zPath into the FTS table.
113*/
114void visit_file(void *pCtx, const char *zPath){
115 int rc;
116 VisitContext *p = (VisitContext*)pCtx;
117 /* printf("%s\n", zPath); */
118 sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC);
119 sqlite3_step(p->pInsert);
120 rc = sqlite3_reset(p->pInsert);
danc7fe7a92014-08-11 19:44:52 +0000121 if( rc!=SQLITE_OK ){
122 sqlite_error_out("insert", p->db);
123 }else if( p->nRowPerTrans>0
124 && (sqlite3_last_insert_rowid(p->db) % p->nRowPerTrans)==0
125 ){
126 sqlite3_exec(p->db, "COMMIT ; BEGIN", 0, 0, 0);
127 }
dan92e497e2014-07-28 20:14:02 +0000128}
129
130/*
131** Recursively traverse directory zDir. For each file that is not a
132** directory, invoke the supplied callback with its path.
133*/
134static void traverse(
135 const char *zDir, /* Directory to traverse */
136 void *pCtx, /* First argument passed to callback */
137 void (*xCallback)(void*, const char *zPath)
138){
139 DIR *d;
140 struct dirent *e;
141
142 d = opendir(zDir);
143 if( d==0 ) error_out("opendir()");
144
145 for(e=readdir(d); e; e=readdir(d)){
146 if( strcmp(e->d_name, ".")==0 || strcmp(e->d_name, "..")==0 ) continue;
147 char *zPath = sqlite3_mprintf("%s/%s", zDir, e->d_name);
148 if (e->d_type & DT_DIR) {
149 traverse(zPath, pCtx, xCallback);
150 }else{
151 xCallback(pCtx, zPath);
152 }
153 sqlite3_free(zPath);
154 }
155
156 closedir(d);
157}
158
159int main(int argc, char **argv){
160 int iFts = 5; /* Value of -fts option */
161 int bMap = 0; /* True to create mapping table */
162 const char *zDir = "."; /* Directory to scan */
163 int i;
164 int rc;
danc7fe7a92014-08-11 19:44:52 +0000165 int nRowPerTrans = 0;
dan92e497e2014-07-28 20:14:02 +0000166 sqlite3 *db;
167 char *zSql;
168 VisitContext sCtx;
169
dan6885bbc2014-08-18 19:30:01 +0000170 int nCmd = 0;
171 char **aCmd = 0;
172
dan92e497e2014-07-28 20:14:02 +0000173 if( argc % 2 ) showHelp(argv[0]);
174
175 for(i=1; i<(argc-1); i+=2){
176 char *zOpt = argv[i];
177 char *zArg = argv[i+1];
178 if( strcmp(zOpt, "-fts")==0 ){
179 iFts = atoi(zArg);
180 if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]);
181 }
dan6885bbc2014-08-18 19:30:01 +0000182 else if( strcmp(zOpt, "-trans")==0 ){
danc7fe7a92014-08-11 19:44:52 +0000183 nRowPerTrans = atoi(zArg);
184 }
dan92e497e2014-07-28 20:14:02 +0000185 else if( strcmp(zOpt, "-idx")==0 ){
186 bMap = atoi(zArg);
187 if( bMap!=0 && bMap!=1 ) showHelp(argv[0]);
188 }
189 else if( strcmp(zOpt, "-dir")==0 ){
190 zDir = zArg;
191 }
dan6885bbc2014-08-18 19:30:01 +0000192 else if( strcmp(zOpt, "-special")==0 ){
193 nCmd++;
194 aCmd = sqlite3_realloc(aCmd, sizeof(char*) * nCmd);
195 aCmd[nCmd-1] = zArg;
196 }
197 else{
198 showHelp(argv[0]);
199 }
dan92e497e2014-07-28 20:14:02 +0000200 }
201
202 /* Open the database file */
203 rc = sqlite3_open(argv[argc-1], &db);
204 if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_open()", db);
205
206 rc = sqlite3_create_function(db, "readtext", 1, SQLITE_UTF8, 0,
207 readfileFunc, 0, 0);
208 if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_create_function()", db);
209
210 /* Create the FTS table */
211 zSql = sqlite3_mprintf("CREATE VIRTUAL TABLE fts USING fts%d(content)", iFts);
212 rc = sqlite3_exec(db, zSql, 0, 0, 0);
213 if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db);
214 sqlite3_free(zSql);
215
dan6885bbc2014-08-18 19:30:01 +0000216 for(i=0; i<nCmd; i++){
217 zSql = sqlite3_mprintf("INSERT INTO fts(fts) VALUES(%Q)", aCmd[i]);
218 rc = sqlite3_exec(db, zSql, 0, 0, 0);
219 if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db);
220 sqlite3_free(zSql);
221 }
222
dan92e497e2014-07-28 20:14:02 +0000223 /* Compile the INSERT statement to write data to the FTS table. */
224 memset(&sCtx, 0, sizeof(VisitContext));
225 sCtx.db = db;
danc7fe7a92014-08-11 19:44:52 +0000226 sCtx.nRowPerTrans = nRowPerTrans;
dan92e497e2014-07-28 20:14:02 +0000227 rc = sqlite3_prepare_v2(db,
228 "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0
229 );
230 if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db);
231
232 /* Load all files in the directory hierarchy into the FTS table. */
danc7fe7a92014-08-11 19:44:52 +0000233 if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0);
dan92e497e2014-07-28 20:14:02 +0000234 traverse(zDir, (void*)&sCtx, visit_file);
danc7fe7a92014-08-11 19:44:52 +0000235 if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0);
dan92e497e2014-07-28 20:14:02 +0000236
237 /* Clean up and exit. */
238 sqlite3_finalize(sCtx.pInsert);
239 sqlite3_close(db);
dan6885bbc2014-08-18 19:30:01 +0000240 sqlite3_free(aCmd);
dan92e497e2014-07-28 20:14:02 +0000241 return 0;
242}