blob: b1c72b3e36f2268b66075ee2a291b97b239a530a [file] [log] [blame]
drh290fcaa2016-06-08 01:03:05 +00001/*
2** 2016-06-07
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12**
drh7c4942c2016-06-08 13:49:28 +000013** This is a utility program that computes an SHA1 hash on the content
drh290fcaa2016-06-08 01:03:05 +000014** of an SQLite database.
15**
16** The hash is computed over just the content of the database. Free
17** space inside of the database file, and alternative on-disk representations
18** of the same content (ex: UTF8 vs UTF16) do not affect the hash. So,
19** for example, the database file page size, encoding, and auto_vacuum setting
20** can all be changed without changing the hash.
21*/
22#include <stdio.h>
23#include <stdlib.h>
24#include <stdarg.h>
25#include <ctype.h>
26#include <string.h>
27#include <assert.h>
28#include "sqlite3.h"
29
30/* Context for the SHA1 hash */
31typedef struct SHA1Context SHA1Context;
32struct SHA1Context {
33 unsigned int state[5];
34 unsigned int count[2];
35 unsigned char buffer[64];
36};
37
38/*
39** All global variables are gathered into the "g" singleton.
40*/
41struct GlobalVars {
42 const char *zArgv0; /* Name of program */
drh290fcaa2016-06-08 01:03:05 +000043 unsigned fDebug; /* Debug flags */
44 sqlite3 *db; /* The database connection */
45 SHA1Context cx; /* SHA1 hash context */
46} g;
47
drh7c4942c2016-06-08 13:49:28 +000048/*
49** Debugging flags
50*/
51#define DEBUG_FULLTRACE 0x00000001 /* Trace hash to stderr */
52
drh290fcaa2016-06-08 01:03:05 +000053/******************************************************************************
54** The Hash Engine
55**
56** Modify these routines (and appropriate state fields in global variable 'g')
57** in order to compute a different (better?) hash of the database.
58*/
59/*
60 * blk0() and blk() perform the initial expand.
61 * I got the idea of expanding during the round function from SSLeay
62 *
63 * blk0le() for little-endian and blk0be() for big-endian.
64 */
65#if __GNUC__ && (defined(__i386__) || defined(__x86_64__))
66/*
67 * GCC by itself only generates left rotates. Use right rotates if
68 * possible to be kinder to dinky implementations with iterative rotate
69 * instructions.
70 */
71#define SHA_ROT(op, x, k) \
72 ({ unsigned int y; asm(op " %1,%0" : "=r" (y) : "I" (k), "0" (x)); y; })
73#define rol(x,k) SHA_ROT("roll", x, k)
74#define ror(x,k) SHA_ROT("rorl", x, k)
75
76#else
77/* Generic C equivalent */
78#define SHA_ROT(x,l,r) ((x) << (l) | (x) >> (r))
79#define rol(x,k) SHA_ROT(x,k,32-(k))
80#define ror(x,k) SHA_ROT(x,32-(k),k)
81#endif
82
83
84#define blk0le(i) (block[i] = (ror(block[i],8)&0xFF00FF00) \
85 |(rol(block[i],8)&0x00FF00FF))
86#define blk0be(i) block[i]
87#define blk(i) (block[i&15] = rol(block[(i+13)&15]^block[(i+8)&15] \
88 ^block[(i+2)&15]^block[i&15],1))
89
90/*
91 * (R0+R1), R2, R3, R4 are the different operations (rounds) used in SHA1
92 *
93 * Rl0() for little-endian and Rb0() for big-endian. Endianness is
94 * determined at run-time.
95 */
96#define Rl0(v,w,x,y,z,i) \
97 z+=((w&(x^y))^y)+blk0le(i)+0x5A827999+rol(v,5);w=ror(w,2);
98#define Rb0(v,w,x,y,z,i) \
99 z+=((w&(x^y))^y)+blk0be(i)+0x5A827999+rol(v,5);w=ror(w,2);
100#define R1(v,w,x,y,z,i) \
101 z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=ror(w,2);
102#define R2(v,w,x,y,z,i) \
103 z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=ror(w,2);
104#define R3(v,w,x,y,z,i) \
105 z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=ror(w,2);
106#define R4(v,w,x,y,z,i) \
107 z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=ror(w,2);
108
109/*
110 * Hash a single 512-bit block. This is the core of the algorithm.
111 */
112#define a qq[0]
113#define b qq[1]
114#define c qq[2]
115#define d qq[3]
116#define e qq[4]
117
118void SHA1Transform(unsigned int state[5], const unsigned char buffer[64]){
119 unsigned int qq[5]; /* a, b, c, d, e; */
120 static int one = 1;
121 unsigned int block[16];
122 memcpy(block, buffer, 64);
123 memcpy(qq,state,5*sizeof(unsigned int));
124
125 /* Copy g.cx.state[] to working vars */
126 /*
127 a = state[0];
128 b = state[1];
129 c = state[2];
130 d = state[3];
131 e = state[4];
132 */
133
134 /* 4 rounds of 20 operations each. Loop unrolled. */
135 if( 1 == *(unsigned char*)&one ){
136 Rl0(a,b,c,d,e, 0); Rl0(e,a,b,c,d, 1); Rl0(d,e,a,b,c, 2); Rl0(c,d,e,a,b, 3);
137 Rl0(b,c,d,e,a, 4); Rl0(a,b,c,d,e, 5); Rl0(e,a,b,c,d, 6); Rl0(d,e,a,b,c, 7);
138 Rl0(c,d,e,a,b, 8); Rl0(b,c,d,e,a, 9); Rl0(a,b,c,d,e,10); Rl0(e,a,b,c,d,11);
139 Rl0(d,e,a,b,c,12); Rl0(c,d,e,a,b,13); Rl0(b,c,d,e,a,14); Rl0(a,b,c,d,e,15);
140 }else{
141 Rb0(a,b,c,d,e, 0); Rb0(e,a,b,c,d, 1); Rb0(d,e,a,b,c, 2); Rb0(c,d,e,a,b, 3);
142 Rb0(b,c,d,e,a, 4); Rb0(a,b,c,d,e, 5); Rb0(e,a,b,c,d, 6); Rb0(d,e,a,b,c, 7);
143 Rb0(c,d,e,a,b, 8); Rb0(b,c,d,e,a, 9); Rb0(a,b,c,d,e,10); Rb0(e,a,b,c,d,11);
144 Rb0(d,e,a,b,c,12); Rb0(c,d,e,a,b,13); Rb0(b,c,d,e,a,14); Rb0(a,b,c,d,e,15);
145 }
146 R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
147 R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
148 R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
149 R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
150 R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
151 R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
152 R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
153 R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
154 R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
155 R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
156 R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
157 R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
158 R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
159 R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
160 R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
161 R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
162
163 /* Add the working vars back into context.state[] */
164 state[0] += a;
165 state[1] += b;
166 state[2] += c;
167 state[3] += d;
168 state[4] += e;
169}
170
171
172/* Initialize the SHA1 hash */
173static void hash_init(void){
174 /* SHA1 initialization constants */
175 g.cx.state[0] = 0x67452301;
176 g.cx.state[1] = 0xEFCDAB89;
177 g.cx.state[2] = 0x98BADCFE;
178 g.cx.state[3] = 0x10325476;
179 g.cx.state[4] = 0xC3D2E1F0;
180 g.cx.count[0] = g.cx.count[1] = 0;
181}
182
183/* Add new content to the SHA1 hash */
184static void hash_step(const unsigned char *data, unsigned int len){
185 unsigned int i, j;
186
187 j = g.cx.count[0];
188 if( (g.cx.count[0] += len << 3) < j ){
189 g.cx.count[1] += (len>>29)+1;
190 }
191 j = (j >> 3) & 63;
192 if( (j + len) > 63 ){
193 (void)memcpy(&g.cx.buffer[j], data, (i = 64-j));
194 SHA1Transform(g.cx.state, g.cx.buffer);
195 for(; i + 63 < len; i += 64){
196 SHA1Transform(g.cx.state, &data[i]);
197 }
198 j = 0;
199 }else{
200 i = 0;
201 }
202 (void)memcpy(&g.cx.buffer[j], &data[i], len - i);
203}
204
205
206/* Add padding and compute and output the message digest. */
drh7c4942c2016-06-08 13:49:28 +0000207static void hash_finish(const char *zName){
drh290fcaa2016-06-08 01:03:05 +0000208 unsigned int i;
209 unsigned char finalcount[8];
210 unsigned char digest[20];
211 static const char zEncode[] = "0123456789abcdef";
drh1d5721a2016-06-08 13:59:35 +0000212 char zOut[41];
drh290fcaa2016-06-08 01:03:05 +0000213
214 for (i = 0; i < 8; i++){
215 finalcount[i] = (unsigned char)((g.cx.count[(i >= 4 ? 0 : 1)]
216 >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */
217 }
218 hash_step((const unsigned char *)"\200", 1);
219 while ((g.cx.count[0] & 504) != 448){
220 hash_step((const unsigned char *)"\0", 1);
221 }
222 hash_step(finalcount, 8); /* Should cause a SHA1Transform() */
223 for (i = 0; i < 20; i++){
224 digest[i] = (unsigned char)((g.cx.state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
225 }
226 for(i=0; i<20; i++){
227 zOut[i*2] = zEncode[(digest[i]>>4)&0xf];
228 zOut[i*2+1] = zEncode[digest[i] & 0xf];
229 }
230 zOut[i*2]= 0;
drh7c4942c2016-06-08 13:49:28 +0000231 printf("%s %s\n", zOut, zName);
drh290fcaa2016-06-08 01:03:05 +0000232}
233/* End of the hashing logic
234*******************************************************************************/
235
236/*
237** Print an error resulting from faulting command-line arguments and
238** abort the program.
239*/
240static void cmdlineError(const char *zFormat, ...){
241 va_list ap;
242 fprintf(stderr, "%s: ", g.zArgv0);
243 va_start(ap, zFormat);
244 vfprintf(stderr, zFormat, ap);
245 va_end(ap);
246 fprintf(stderr, "\n\"%s --help\" for more help\n", g.zArgv0);
247 exit(1);
248}
249
250/*
251** Print an error message for an error that occurs at runtime, then
252** abort the program.
253*/
254static void runtimeError(const char *zFormat, ...){
255 va_list ap;
256 fprintf(stderr, "%s: ", g.zArgv0);
257 va_start(ap, zFormat);
258 vfprintf(stderr, zFormat, ap);
259 va_end(ap);
260 fprintf(stderr, "\n");
261 exit(1);
262}
263
264/*
265** Prepare a new SQL statement. Print an error and abort if anything
266** goes wrong.
267*/
268static sqlite3_stmt *db_vprepare(const char *zFormat, va_list ap){
269 char *zSql;
270 int rc;
271 sqlite3_stmt *pStmt;
272
273 zSql = sqlite3_vmprintf(zFormat, ap);
274 if( zSql==0 ) runtimeError("out of memory");
275 rc = sqlite3_prepare_v2(g.db, zSql, -1, &pStmt, 0);
276 if( rc ){
277 runtimeError("SQL statement error: %s\n\"%s\"", sqlite3_errmsg(g.db),
278 zSql);
279 }
280 sqlite3_free(zSql);
281 return pStmt;
282}
283static sqlite3_stmt *db_prepare(const char *zFormat, ...){
284 va_list ap;
285 sqlite3_stmt *pStmt;
286 va_start(ap, zFormat);
287 pStmt = db_vprepare(zFormat, ap);
288 va_end(ap);
289 return pStmt;
290}
291
292/*
drh7c4942c2016-06-08 13:49:28 +0000293** Compute the hash for all rows of the query formed from the printf-style
294** zFormat and its argument.
drh290fcaa2016-06-08 01:03:05 +0000295*/
drh7c4942c2016-06-08 13:49:28 +0000296static void hash_one_query(const char *zFormat, ...){
297 va_list ap;
298 sqlite3_stmt *pStmt; /* The query defined by zFormat and "..." */
299 int nCol; /* Number of columns in the result set */
300 int i; /* Loop counter */
301
302 /* Prepare the query defined by zFormat and "..." */
303 va_start(ap, zFormat);
304 pStmt = db_vprepare(zFormat, ap);
305 va_end(ap);
drh290fcaa2016-06-08 01:03:05 +0000306 nCol = sqlite3_column_count(pStmt);
drh7c4942c2016-06-08 13:49:28 +0000307
308 /* Compute a hash over the result of the query */
drh290fcaa2016-06-08 01:03:05 +0000309 while( SQLITE_ROW==sqlite3_step(pStmt) ){
310 for(i=0; i<nCol; i++){
311 switch( sqlite3_column_type(pStmt,i) ){
312 case SQLITE_NULL: {
313 hash_step((const unsigned char*)"0",1);
drh7c4942c2016-06-08 13:49:28 +0000314 if( g.fDebug & DEBUG_FULLTRACE ) fprintf(stderr, "NULL\n");
drh290fcaa2016-06-08 01:03:05 +0000315 break;
316 }
317 case SQLITE_INTEGER: {
318 sqlite3_uint64 u;
319 int j;
320 unsigned char x[8];
321 sqlite3_int64 v = sqlite3_column_int64(pStmt,i);
322 memcpy(&u, &v, 8);
323 for(j=7; j>=0; j--){
324 x[j] = u & 0xff;
325 u >>= 8;
326 }
327 hash_step((const unsigned char*)"1",1);
328 hash_step(x,8);
drh7c4942c2016-06-08 13:49:28 +0000329 if( g.fDebug & DEBUG_FULLTRACE ){
330 fprintf(stderr, "INT %s\n", sqlite3_column_text(pStmt,i));
331 }
drh290fcaa2016-06-08 01:03:05 +0000332 break;
333 }
334 case SQLITE_FLOAT: {
335 sqlite3_uint64 u;
336 int j;
337 unsigned char x[8];
338 double r = sqlite3_column_double(pStmt,i);
339 memcpy(&u, &r, 8);
340 for(j=7; j>=0; j--){
341 x[j] = u & 0xff;
342 u >>= 8;
343 }
344 hash_step((const unsigned char*)"2",1);
345 hash_step(x,8);
drh7c4942c2016-06-08 13:49:28 +0000346 if( g.fDebug & DEBUG_FULLTRACE ){
347 fprintf(stderr, "FLOAT %s\n", sqlite3_column_text(pStmt,i));
348 }
drh290fcaa2016-06-08 01:03:05 +0000349 break;
350 }
351 case SQLITE_TEXT: {
352 int n = sqlite3_column_bytes(pStmt, i);
353 const unsigned char *z = sqlite3_column_text(pStmt, i);
354 hash_step((const unsigned char*)"3", 1);
355 hash_step(z, n);
drh7c4942c2016-06-08 13:49:28 +0000356 if( g.fDebug & DEBUG_FULLTRACE ){
357 fprintf(stderr, "TEXT '%s'\n", sqlite3_column_text(pStmt,i));
358 }
drh290fcaa2016-06-08 01:03:05 +0000359 break;
360 }
361 case SQLITE_BLOB: {
362 int n = sqlite3_column_bytes(pStmt, i);
363 const unsigned char *z = sqlite3_column_blob(pStmt, i);
364 hash_step((const unsigned char*)"4", 1);
365 hash_step(z, n);
drh7c4942c2016-06-08 13:49:28 +0000366 if( g.fDebug & DEBUG_FULLTRACE ){
367 fprintf(stderr, "BLOB (%d bytes)\n", n);
368 }
drh290fcaa2016-06-08 01:03:05 +0000369 break;
370 }
371 }
372 }
373 }
374 sqlite3_finalize(pStmt);
375}
376
377
378/*
379** Print sketchy documentation for this utility program
380*/
381static void showHelp(void){
drh7c4942c2016-06-08 13:49:28 +0000382 printf("Usage: %s [options] FILE ...\n", g.zArgv0);
drh290fcaa2016-06-08 01:03:05 +0000383 printf(
drh7c4942c2016-06-08 13:49:28 +0000384"Compute a SHA1 hash on the content of database FILE. System tables such as\n"
385"sqlite_stat1, sqlite_stat4, and sqlite_sequence are omitted from the hash.\n"
386"Options:\n"
387" --debug N Set debugging flags to N (experts only)\n"
388" --like PATTERN Only hash tables whose name is LIKE the pattern\n"
389" --schema-only Only hash the schema - omit table content\n"
390" --without-schema Only hash table content - omit the schema\n"
drh290fcaa2016-06-08 01:03:05 +0000391 );
392}
393
394int main(int argc, char **argv){
drh7c4942c2016-06-08 13:49:28 +0000395 const char *zDb = 0; /* Name of the database currently being hashed */
396 int i; /* Loop counter */
397 int rc; /* Subroutine return code */
398 char *zErrMsg; /* Error message when opening database */
399 sqlite3_stmt *pStmt; /* An SQLite query */
400 const char *zLike = 0; /* LIKE pattern of tables to hash */
401 int omitSchema = 0; /* True to compute hash on content only */
402 int omitContent = 0; /* True to compute hash on schema only */
403 int nFile = 0; /* Number of input filenames seen */
drh290fcaa2016-06-08 01:03:05 +0000404
405 g.zArgv0 = argv[0];
406 sqlite3_config(SQLITE_CONFIG_SINGLETHREAD);
407 for(i=1; i<argc; i++){
408 const char *z = argv[i];
409 if( z[0]=='-' ){
410 z++;
411 if( z[0]=='-' ) z++;
412 if( strcmp(z,"debug")==0 ){
413 if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]);
414 g.fDebug = strtol(argv[++i], 0, 0);
415 }else
416 if( strcmp(z,"help")==0 ){
417 showHelp();
418 return 0;
419 }else
drh7c4942c2016-06-08 13:49:28 +0000420 if( strcmp(z,"like")==0 ){
421 if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]);
422 if( zLike!=0 ) cmdlineError("only one --like allowed");
423 zLike = argv[++i];
424 }else
425 if( strcmp(z,"schema-only")==0 ){
426 omitContent = 1;
427 }else
428 if( strcmp(z,"without-schema")==0 ){
429 omitSchema = 1;
drh290fcaa2016-06-08 01:03:05 +0000430 }else
431 {
432 cmdlineError("unknown option: %s", argv[i]);
433 }
drh290fcaa2016-06-08 01:03:05 +0000434 }else{
drh7c4942c2016-06-08 13:49:28 +0000435 nFile++;
436 if( nFile<i ) argv[nFile] = argv[i];
drh290fcaa2016-06-08 01:03:05 +0000437 }
438 }
drh7c4942c2016-06-08 13:49:28 +0000439 if( nFile==0 ){
440 cmdlineError("no input files specified - nothing to do");
drh290fcaa2016-06-08 01:03:05 +0000441 }
drh7c4942c2016-06-08 13:49:28 +0000442 if( omitSchema && omitContent ){
443 cmdlineError("only one of --without-schema and --omit-schema allowed");
drh290fcaa2016-06-08 01:03:05 +0000444 }
drh7c4942c2016-06-08 13:49:28 +0000445 if( zLike==0 ) zLike = "%";
drh290fcaa2016-06-08 01:03:05 +0000446
drh7c4942c2016-06-08 13:49:28 +0000447 for(i=1; i<=nFile; i++){
448 static const int openFlags =
449 SQLITE_OPEN_READWRITE | /* Read/write so hot journals can recover */
450 SQLITE_OPEN_URI
451 ;
452 zDb = argv[i];
453 rc = sqlite3_open_v2(zDb, &g.db, openFlags, 0);
454 if( rc ){
455 fprintf(stderr, "cannot open database file '%s'\n", zDb);
456 continue;
457 }
458 rc = sqlite3_exec(g.db, "SELECT * FROM sqlite_master", 0, 0, &zErrMsg);
459 if( rc || zErrMsg ){
460 sqlite3_close(g.db);
461 g.db = 0;
462 fprintf(stderr, "'%s' is not a valid SQLite database\n", zDb);
463 continue;
464 }
drh290fcaa2016-06-08 01:03:05 +0000465
drh7c4942c2016-06-08 13:49:28 +0000466 /* Start the hash */
467 hash_init();
468
469 /* Hash table content */
470 if( !omitContent ){
471 pStmt = db_prepare(
472 "SELECT name FROM sqlite_master\n"
473 " WHERE type='table' AND sql NOT LIKE 'CREATE VIRTUAL%%'\n"
474 " AND name NOT LIKE 'sqlite_%%'\n"
475 " AND name LIKE '%q'\n"
476 " ORDER BY name COLLATE nocase;\n",
477 zLike
478 );
479 while( SQLITE_ROW==sqlite3_step(pStmt) ){
480 /* We want rows of the table to be hashed in PRIMARY KEY order.
481 ** Technically, an ORDER BY clause is required to guarantee that
482 ** order. However, though not guaranteed by the documentation, every
483 ** historical version of SQLite has always output rows in PRIMARY KEY
484 ** order when there is no WHERE or GROUP BY clause, so the ORDER BY
485 ** can be safely omitted. */
486 hash_one_query("SELECT * FROM \"%w\"", sqlite3_column_text(pStmt,0));
487 }
488 sqlite3_finalize(pStmt);
489 }
490
491 /* Hash the database schema */
492 if( !omitSchema ){
493 hash_one_query(
494 "SELECT type, name, tbl_name, sql FROM sqlite_master\n"
495 " WHERE tbl_name LIKE '%q'\n"
496 " ORDER BY name COLLATE nocase;\n",
497 zLike
498 );
499 }
500
501 /* Finish and output the hash and close the database connection. */
502 hash_finish(zDb);
503 sqlite3_close(g.db);
504 }
drh290fcaa2016-06-08 01:03:05 +0000505 return 0;
506}