dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 1 | /* |
drh | fcd4325 | 2015-07-13 23:55:08 +0000 | [diff] [blame] | 2 | ** 2014-07-28 |
dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 3 | ** |
| 4 | ** The author disclaims copyright to this source code. In place of |
| 5 | ** a legal notice, here is a blessing: |
| 6 | ** |
| 7 | ** May you do good and not evil. |
| 8 | ** May you find forgiveness for yourself and forgive others. |
| 9 | ** May you share freely, never taking more than you give. |
| 10 | ** |
| 11 | ************************************************************************* |
drh | 3547c51 | 2015-07-13 18:01:14 +0000 | [diff] [blame] | 12 | ** |
| 13 | ** This file implements a utility program that will load many disk |
| 14 | ** files (all files under a given directory) into a FTS table. This is |
| 15 | ** used for performance testing of FTS3, FTS4, and FTS5. |
dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 16 | */ |
| 17 | |
| 18 | #include <stdio.h> |
| 19 | #include <stdlib.h> |
| 20 | #include <ctype.h> |
| 21 | #include <assert.h> |
| 22 | #include <string.h> |
| 23 | #include <errno.h> |
| 24 | #include <dirent.h> |
| 25 | #include "sqlite3.h" |
| 26 | |
| 27 | /* |
| 28 | ** Implementation of the "readtext(X)" SQL function. The entire content |
| 29 | ** of the file named X is read and returned as a TEXT value. It is assumed |
| 30 | ** the file contains UTF-8 text. NULL is returned if the file does not |
| 31 | ** exist or is unreadable. |
| 32 | */ |
| 33 | static void readfileFunc( |
| 34 | sqlite3_context *context, |
| 35 | int argc, |
| 36 | sqlite3_value **argv |
| 37 | ){ |
| 38 | const char *zName; |
| 39 | FILE *in; |
| 40 | long nIn; |
| 41 | void *pBuf; |
| 42 | |
| 43 | zName = (const char*)sqlite3_value_text(argv[0]); |
| 44 | if( zName==0 ) return; |
| 45 | in = fopen(zName, "rb"); |
| 46 | if( in==0 ) return; |
| 47 | fseek(in, 0, SEEK_END); |
| 48 | nIn = ftell(in); |
| 49 | rewind(in); |
| 50 | pBuf = sqlite3_malloc( nIn ); |
| 51 | if( pBuf && 1==fread(pBuf, nIn, 1, in) ){ |
| 52 | sqlite3_result_text(context, pBuf, nIn, sqlite3_free); |
| 53 | }else{ |
| 54 | sqlite3_free(pBuf); |
| 55 | } |
| 56 | fclose(in); |
| 57 | } |
| 58 | |
| 59 | /* |
| 60 | ** Print usage text for this program and exit. |
| 61 | */ |
| 62 | static void showHelp(const char *zArgv0){ |
| 63 | printf("\n" |
| 64 | "Usage: %s SWITCHES... DB\n" |
| 65 | "\n" |
| 66 | " This program opens the database named on the command line and attempts to\n" |
| 67 | " create an FTS table named \"fts\" with a single column. If successful, it\n" |
| 68 | " recursively traverses the directory named by the -dir option and inserts\n" |
| 69 | " the contents of each file into the fts table. All files are assumed to\n" |
| 70 | " contain UTF-8 text.\n" |
| 71 | "\n" |
| 72 | "Switches are:\n" |
| 73 | " -fts [345] FTS version to use (default=5)\n" |
| 74 | " -idx [01] Create a mapping from filename to rowid (default=0)\n" |
| 75 | " -dir <path> Root of directory tree to load data from (default=.)\n" |
dan | c7fe7a9 | 2014-08-11 19:44:52 +0000 | [diff] [blame] | 76 | " -trans <integer> Number of inserts per transaction (default=1)\n" |
dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 77 | , zArgv0 |
| 78 | ); |
| 79 | exit(1); |
| 80 | } |
| 81 | |
| 82 | /* |
| 83 | ** Exit with a message based on the argument and the current value of errno. |
| 84 | */ |
| 85 | static void error_out(const char *zText){ |
| 86 | fprintf(stderr, "%s: %s\n", zText, strerror(errno)); |
| 87 | exit(-1); |
| 88 | } |
| 89 | |
| 90 | /* |
| 91 | ** Exit with a message based on the first argument and the error message |
| 92 | ** currently stored in database handle db. |
| 93 | */ |
| 94 | static void sqlite_error_out(const char *zText, sqlite3 *db){ |
| 95 | fprintf(stderr, "%s: %s\n", zText, sqlite3_errmsg(db)); |
| 96 | exit(-1); |
| 97 | } |
| 98 | |
| 99 | /* |
| 100 | ** Context object for visit_file(). |
| 101 | */ |
| 102 | typedef struct VisitContext VisitContext; |
| 103 | struct VisitContext { |
dan | c7fe7a9 | 2014-08-11 19:44:52 +0000 | [diff] [blame] | 104 | int nRowPerTrans; |
dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 105 | sqlite3 *db; /* Database handle */ |
| 106 | sqlite3_stmt *pInsert; /* INSERT INTO fts VALUES(readtext(:1)) */ |
| 107 | }; |
| 108 | |
| 109 | /* |
| 110 | ** Callback used with traverse(). The first argument points to an object |
| 111 | ** of type VisitContext. This function inserts the contents of the text |
| 112 | ** file zPath into the FTS table. |
| 113 | */ |
| 114 | void visit_file(void *pCtx, const char *zPath){ |
| 115 | int rc; |
| 116 | VisitContext *p = (VisitContext*)pCtx; |
| 117 | /* printf("%s\n", zPath); */ |
| 118 | sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC); |
| 119 | sqlite3_step(p->pInsert); |
| 120 | rc = sqlite3_reset(p->pInsert); |
dan | c7fe7a9 | 2014-08-11 19:44:52 +0000 | [diff] [blame] | 121 | if( rc!=SQLITE_OK ){ |
| 122 | sqlite_error_out("insert", p->db); |
| 123 | }else if( p->nRowPerTrans>0 |
| 124 | && (sqlite3_last_insert_rowid(p->db) % p->nRowPerTrans)==0 |
| 125 | ){ |
| 126 | sqlite3_exec(p->db, "COMMIT ; BEGIN", 0, 0, 0); |
| 127 | } |
dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 128 | } |
| 129 | |
| 130 | /* |
| 131 | ** Recursively traverse directory zDir. For each file that is not a |
| 132 | ** directory, invoke the supplied callback with its path. |
| 133 | */ |
| 134 | static void traverse( |
| 135 | const char *zDir, /* Directory to traverse */ |
| 136 | void *pCtx, /* First argument passed to callback */ |
| 137 | void (*xCallback)(void*, const char *zPath) |
| 138 | ){ |
| 139 | DIR *d; |
| 140 | struct dirent *e; |
| 141 | |
| 142 | d = opendir(zDir); |
| 143 | if( d==0 ) error_out("opendir()"); |
| 144 | |
| 145 | for(e=readdir(d); e; e=readdir(d)){ |
| 146 | if( strcmp(e->d_name, ".")==0 || strcmp(e->d_name, "..")==0 ) continue; |
| 147 | char *zPath = sqlite3_mprintf("%s/%s", zDir, e->d_name); |
| 148 | if (e->d_type & DT_DIR) { |
| 149 | traverse(zPath, pCtx, xCallback); |
| 150 | }else{ |
| 151 | xCallback(pCtx, zPath); |
| 152 | } |
| 153 | sqlite3_free(zPath); |
| 154 | } |
| 155 | |
| 156 | closedir(d); |
| 157 | } |
| 158 | |
| 159 | int main(int argc, char **argv){ |
| 160 | int iFts = 5; /* Value of -fts option */ |
| 161 | int bMap = 0; /* True to create mapping table */ |
| 162 | const char *zDir = "."; /* Directory to scan */ |
| 163 | int i; |
| 164 | int rc; |
dan | c7fe7a9 | 2014-08-11 19:44:52 +0000 | [diff] [blame] | 165 | int nRowPerTrans = 0; |
dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 166 | sqlite3 *db; |
| 167 | char *zSql; |
| 168 | VisitContext sCtx; |
| 169 | |
dan | 6885bbc | 2014-08-18 19:30:01 +0000 | [diff] [blame] | 170 | int nCmd = 0; |
| 171 | char **aCmd = 0; |
| 172 | |
dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 173 | if( argc % 2 ) showHelp(argv[0]); |
| 174 | |
| 175 | for(i=1; i<(argc-1); i+=2){ |
| 176 | char *zOpt = argv[i]; |
| 177 | char *zArg = argv[i+1]; |
| 178 | if( strcmp(zOpt, "-fts")==0 ){ |
| 179 | iFts = atoi(zArg); |
| 180 | if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]); |
| 181 | } |
dan | 6885bbc | 2014-08-18 19:30:01 +0000 | [diff] [blame] | 182 | else if( strcmp(zOpt, "-trans")==0 ){ |
dan | c7fe7a9 | 2014-08-11 19:44:52 +0000 | [diff] [blame] | 183 | nRowPerTrans = atoi(zArg); |
| 184 | } |
dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 185 | else if( strcmp(zOpt, "-idx")==0 ){ |
| 186 | bMap = atoi(zArg); |
| 187 | if( bMap!=0 && bMap!=1 ) showHelp(argv[0]); |
| 188 | } |
| 189 | else if( strcmp(zOpt, "-dir")==0 ){ |
| 190 | zDir = zArg; |
| 191 | } |
dan | 6885bbc | 2014-08-18 19:30:01 +0000 | [diff] [blame] | 192 | else if( strcmp(zOpt, "-special")==0 ){ |
| 193 | nCmd++; |
| 194 | aCmd = sqlite3_realloc(aCmd, sizeof(char*) * nCmd); |
| 195 | aCmd[nCmd-1] = zArg; |
| 196 | } |
| 197 | else{ |
| 198 | showHelp(argv[0]); |
| 199 | } |
dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 200 | } |
| 201 | |
| 202 | /* Open the database file */ |
| 203 | rc = sqlite3_open(argv[argc-1], &db); |
| 204 | if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_open()", db); |
| 205 | |
| 206 | rc = sqlite3_create_function(db, "readtext", 1, SQLITE_UTF8, 0, |
| 207 | readfileFunc, 0, 0); |
| 208 | if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_create_function()", db); |
| 209 | |
| 210 | /* Create the FTS table */ |
| 211 | zSql = sqlite3_mprintf("CREATE VIRTUAL TABLE fts USING fts%d(content)", iFts); |
| 212 | rc = sqlite3_exec(db, zSql, 0, 0, 0); |
| 213 | if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db); |
| 214 | sqlite3_free(zSql); |
| 215 | |
dan | 6885bbc | 2014-08-18 19:30:01 +0000 | [diff] [blame] | 216 | for(i=0; i<nCmd; i++){ |
| 217 | zSql = sqlite3_mprintf("INSERT INTO fts(fts) VALUES(%Q)", aCmd[i]); |
| 218 | rc = sqlite3_exec(db, zSql, 0, 0, 0); |
| 219 | if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db); |
| 220 | sqlite3_free(zSql); |
| 221 | } |
| 222 | |
dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 223 | /* Compile the INSERT statement to write data to the FTS table. */ |
| 224 | memset(&sCtx, 0, sizeof(VisitContext)); |
| 225 | sCtx.db = db; |
dan | c7fe7a9 | 2014-08-11 19:44:52 +0000 | [diff] [blame] | 226 | sCtx.nRowPerTrans = nRowPerTrans; |
dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 227 | rc = sqlite3_prepare_v2(db, |
| 228 | "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0 |
| 229 | ); |
| 230 | if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db); |
| 231 | |
| 232 | /* Load all files in the directory hierarchy into the FTS table. */ |
dan | c7fe7a9 | 2014-08-11 19:44:52 +0000 | [diff] [blame] | 233 | if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0); |
dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 234 | traverse(zDir, (void*)&sCtx, visit_file); |
dan | c7fe7a9 | 2014-08-11 19:44:52 +0000 | [diff] [blame] | 235 | if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0); |
dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 236 | |
| 237 | /* Clean up and exit. */ |
| 238 | sqlite3_finalize(sCtx.pInsert); |
| 239 | sqlite3_close(db); |
dan | 6885bbc | 2014-08-18 19:30:01 +0000 | [diff] [blame] | 240 | sqlite3_free(aCmd); |
dan | 92e497e | 2014-07-28 20:14:02 +0000 | [diff] [blame] | 241 | return 0; |
| 242 | } |