blob: 3e08d3ffa74adf211d6e6406e4b889a01cf9f959 [file] [log] [blame]
drhd59fde32017-10-31 14:56:44 +00001# Run this TCL script using an SQLite-enabled TCL interpreter to get a report
2# on how much disk space is used by a particular data to actually store data
drh3e27c022004-07-23 00:01:38 +00003# versus how much space is unused.
4#
drhd59fde32017-10-31 14:56:44 +00005# The dbstat virtual table is required.
6#
drh3e27c022004-07-23 00:01:38 +00007
drha7531c62006-01-24 02:19:53 +00008if {[catch {
dan9fab5ed2015-02-09 17:46:11 +00009
10# Argument $tname is the name of a table within the database opened by
11# database handle [db]. Return true if it is a WITHOUT ROWID table, or
12# false otherwise.
13#
14proc is_without_rowid {tname} {
15 set t [string map {' ''} $tname]
16 db eval "PRAGMA index_list = '$t'" o {
17 if {$o(origin) == "pk"} {
18 set n $o(name)
19 if {0==[db one { SELECT count(*) FROM sqlite_master WHERE name=$n }]} {
20 return 1
21 }
22 }
23 }
24 return 0
25}
26
drh43269742016-10-12 18:26:26 +000027# Read and run TCL commands from standard input. Used to implement
28# the --tclsh option.
29#
30proc tclsh {} {
31 set line {}
32 while {![eof stdin]} {
33 if {$line!=""} {
34 puts -nonewline "> "
35 } else {
36 puts -nonewline "% "
37 }
38 flush stdout
39 append line [gets stdin]
40 if {[info complete $line]} {
41 if {[catch {uplevel #0 $line} result]} {
42 puts stderr "Error: $result"
43 } elseif {$result!=""} {
44 puts $result
45 }
46 set line {}
47 } else {
48 append line \n
49 }
50 }
51}
52
53
drh3e27c022004-07-23 00:01:38 +000054# Get the name of the database to analyze
55#
drhb7708942011-10-05 18:18:13 +000056proc usage {} {
57 set argv0 [file rootname [file tail [info nameofexecutable]]]
drh310a8d62015-09-08 17:31:30 +000058 puts stderr "Usage: $argv0 ?--pageinfo? ?--stats? database-filename"
59 puts stderr {
60Analyze the SQLite3 database file specified by the "database-filename"
61argument and output a report detailing size and storage efficiency
62information for the database and its constituent tables and indexes.
63
64Options:
65
drh43269742016-10-12 18:26:26 +000066 --pageinfo Show how each page of the database-file is used
drh310a8d62015-09-08 17:31:30 +000067
drh43269742016-10-12 18:26:26 +000068 --stats Output SQL text that creates a new database containing
69 statistics about the database that was analyzed
70
71 --tclsh Run the built-in TCL interpreter interactively (for debugging)
72
73 --version Show the version number of SQLite
drh310a8d62015-09-08 17:31:30 +000074}
drh3e27c022004-07-23 00:01:38 +000075 exit 1
76}
drhb7708942011-10-05 18:18:13 +000077set file_to_analyze {}
78set flags(-pageinfo) 0
drha624fd52011-10-05 19:46:03 +000079set flags(-stats) 0
drh43269742016-10-12 18:26:26 +000080set flags(-debug) 0
drhb7708942011-10-05 18:18:13 +000081append argv {}
82foreach arg $argv {
83 if {[regexp {^-+pageinfo$} $arg]} {
84 set flags(-pageinfo) 1
drha624fd52011-10-05 19:46:03 +000085 } elseif {[regexp {^-+stats$} $arg]} {
86 set flags(-stats) 1
drh43269742016-10-12 18:26:26 +000087 } elseif {[regexp {^-+debug$} $arg]} {
88 set flags(-debug) 1
89 } elseif {[regexp {^-+tclsh$} $arg]} {
90 tclsh
91 exit 0
92 } elseif {[regexp {^-+version$} $arg]} {
93 sqlite3 mem :memory:
94 puts [mem one {SELECT sqlite_version()||' '||sqlite_source_id()}]
95 mem close
96 exit 0
drhb7708942011-10-05 18:18:13 +000097 } elseif {[regexp {^-} $arg]} {
98 puts stderr "Unknown option: $arg"
99 usage
100 } elseif {$file_to_analyze!=""} {
101 usage
102 } else {
103 set file_to_analyze $arg
104 }
105}
106if {$file_to_analyze==""} usage
drh453a3122012-10-10 10:52:46 +0000107set root_filename $file_to_analyze
108regexp {^file:(//)?([^?]*)} $file_to_analyze all x1 root_filename
109if {![file exists $root_filename]} {
110 puts stderr "No such file: $root_filename"
drh3e27c022004-07-23 00:01:38 +0000111 exit 1
112}
drh453a3122012-10-10 10:52:46 +0000113if {![file readable $root_filename]} {
114 puts stderr "File is not readable: $root_filename"
drh3e27c022004-07-23 00:01:38 +0000115 exit 1
116}
drh453a3122012-10-10 10:52:46 +0000117set true_file_size [file size $root_filename]
drhb7708942011-10-05 18:18:13 +0000118if {$true_file_size<512} {
drh453a3122012-10-10 10:52:46 +0000119 puts stderr "Empty or malformed database: $root_filename"
drh3e27c022004-07-23 00:01:38 +0000120 exit 1
121}
122
drh36c06322011-10-10 16:06:35 +0000123# Compute the total file size assuming test_multiplexor is being used.
124# Assume that SQLITE_ENABLE_8_3_NAMES might be enabled
125#
drh453a3122012-10-10 10:52:46 +0000126set extension [file extension $root_filename]
127set pattern $root_filename
drh37002622012-04-06 00:09:27 +0000128append pattern {[0-3][0-9][0-9]}
drh36c06322011-10-10 16:06:35 +0000129foreach f [glob -nocomplain $pattern] {
130 incr true_file_size [file size $f]
131 set extension {}
132}
133if {[string length $extension]>=2 && [string length $extension]<=4} {
drh453a3122012-10-10 10:52:46 +0000134 set pattern [file rootname $root_filename]
drh37002622012-04-06 00:09:27 +0000135 append pattern {.[0-3][0-9][0-9]}
drh36c06322011-10-10 16:06:35 +0000136 foreach f [glob -nocomplain $pattern] {
137 incr true_file_size [file size $f]
138 }
139}
140
drh3e27c022004-07-23 00:01:38 +0000141# Open the database
142#
drh453a3122012-10-10 10:52:46 +0000143if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} {
144 puts stderr "error trying to open $file_to_analyze: $msg"
145 exit 1
146}
drh43269742016-10-12 18:26:26 +0000147if {$flags(-debug)} {
148 proc dbtrace {txt} {puts $txt; flush stdout;}
149 db trace ::dbtrace
150}
dan599e9d22010-07-12 08:39:37 +0000151
drhd59fde32017-10-31 14:56:44 +0000152# Make sure all required compile-time options are available
153#
154if {![db exists {SELECT 1 FROM pragma_compile_options
155 WHERE compile_options='ENABLE_DBSTAT_VTAB'}]} {
156 puts "The SQLite database engine linked with this application\
157 lacks required capabilities. Recompile using the\
158 -DSQLITE_ENABLE_DBSTAT_VTAB compile-time option to fix\
159 this problem."
160 exit 1
161}
162
drh565621a2011-09-21 20:10:42 +0000163db eval {SELECT count(*) FROM sqlite_master}
drhf08f3842011-09-27 13:40:26 +0000164set pageSize [expr {wide([db one {PRAGMA page_size}])}]
drh3e27c022004-07-23 00:01:38 +0000165
drha624fd52011-10-05 19:46:03 +0000166if {$flags(-pageinfo)} {
167 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
168 db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} {
169 puts "$pageno $name $path"
170 }
171 exit 0
172}
173if {$flags(-stats)} {
174 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
175 puts "BEGIN;"
176 puts "CREATE TABLE stats("
177 puts " name STRING, /* Name of table or index */"
178 puts " path INTEGER, /* Path to page from root */"
179 puts " pageno INTEGER, /* Page number */"
180 puts " pagetype STRING, /* 'internal', 'leaf' or 'overflow' */"
181 puts " ncell INTEGER, /* Cells on page (0 for overflow) */"
182 puts " payload INTEGER, /* Bytes of payload on this page */"
183 puts " unused INTEGER, /* Bytes of unused space on this page */"
184 puts " mx_payload INTEGER, /* Largest payload size of all cells */"
185 puts " pgoffset INTEGER, /* Offset of page in file */"
186 puts " pgsize INTEGER /* Size of the page */"
187 puts ");"
188 db eval {SELECT quote(name) || ',' ||
189 quote(path) || ',' ||
190 quote(pageno) || ',' ||
191 quote(pagetype) || ',' ||
192 quote(ncell) || ',' ||
193 quote(payload) || ',' ||
194 quote(unused) || ',' ||
195 quote(mx_payload) || ',' ||
196 quote(pgoffset) || ',' ||
197 quote(pgsize) AS x FROM stat} {
198 puts "INSERT INTO stats VALUES($x);"
199 }
200 puts "COMMIT;"
201 exit 0
202}
203
drh43269742016-10-12 18:26:26 +0000204
danielk19770ba87cb2004-11-09 07:42:11 +0000205# In-memory database for collecting statistics. This script loops through
206# the tables and indices in the database being analyzed, adding a row for each
207# to an in-memory database (for which the schema is shown below). It then
208# queries the in-memory db to produce the space-analysis report.
drh3e27c022004-07-23 00:01:38 +0000209#
210sqlite3 mem :memory:
drh43269742016-10-12 18:26:26 +0000211if {$flags(-debug)} {
212 proc dbtrace {txt} {puts $txt; flush stdout;}
213 mem trace ::dbtrace
214}
drh4515a452011-08-31 17:46:50 +0000215set tabledef {CREATE TABLE space_used(
drh3e27c022004-07-23 00:01:38 +0000216 name clob, -- Name of a table or index in the database file
217 tblname clob, -- Name of associated table
218 is_index boolean, -- TRUE if it is an index, false for a table
dand49c6932016-05-06 15:16:02 +0000219 is_without_rowid boolean, -- TRUE if WITHOUT ROWID table
drh3e27c022004-07-23 00:01:38 +0000220 nentry int, -- Number of entries in the BTree
221 leaf_entries int, -- Number of leaf entries
drh8fb6c432015-08-04 14:18:10 +0000222 depth int, -- Depth of the b-tree
drh3e27c022004-07-23 00:01:38 +0000223 payload int, -- Total amount of data stored in this table or index
224 ovfl_payload int, -- Total amount of data stored on overflow pages
225 ovfl_cnt int, -- Number of entries that use overflow
226 mx_payload int, -- Maximum payload size
227 int_pages int, -- Number of interior pages used
228 leaf_pages int, -- Number of leaf pages used
229 ovfl_pages int, -- Number of overflow pages used
230 int_unused int, -- Number of unused bytes on interior pages
231 leaf_unused int, -- Number of unused bytes on primary pages
drh50c67062007-02-10 19:22:35 +0000232 ovfl_unused int, -- Number of unused bytes on overflow pages
drh4c9f1292011-09-28 00:50:14 +0000233 gap_cnt int, -- Number of gaps in the page layout
234 compressed_size int -- Total bytes stored on disk
drh3e27c022004-07-23 00:01:38 +0000235);}
236mem eval $tabledef
237
dan599e9d22010-07-12 08:39:37 +0000238# Create a temporary "dbstat" virtual table.
239#
drh565621a2011-09-21 20:10:42 +0000240db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
241db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat
242 ORDER BY name, path}
243db eval {DROP TABLE temp.stat}
dan599e9d22010-07-12 08:39:37 +0000244
drhb7708942011-10-05 18:18:13 +0000245set isCompressed 0
246set compressOverhead 0
drh8fb6c432015-08-04 14:18:10 +0000247set depth 0
dan599e9d22010-07-12 08:39:37 +0000248set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 }
249foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] {
250
251 set is_index [expr {$name!=$tblname}]
dand49c6932016-05-06 15:16:02 +0000252 set is_without_rowid [is_without_rowid $name]
dan599e9d22010-07-12 08:39:37 +0000253 db eval {
254 SELECT
255 sum(ncell) AS nentry,
drh8fb6c432015-08-04 14:18:10 +0000256 sum((pagetype=='leaf')*ncell) AS leaf_entries,
dan599e9d22010-07-12 08:39:37 +0000257 sum(payload) AS payload,
drh8fb6c432015-08-04 14:18:10 +0000258 sum((pagetype=='overflow') * payload) AS ovfl_payload,
dan599e9d22010-07-12 08:39:37 +0000259 sum(path LIKE '%+000000') AS ovfl_cnt,
260 max(mx_payload) AS mx_payload,
drh8fb6c432015-08-04 14:18:10 +0000261 sum(pagetype=='internal') AS int_pages,
262 sum(pagetype=='leaf') AS leaf_pages,
263 sum(pagetype=='overflow') AS ovfl_pages,
264 sum((pagetype=='internal') * unused) AS int_unused,
265 sum((pagetype=='leaf') * unused) AS leaf_unused,
266 sum((pagetype=='overflow') * unused) AS ovfl_unused,
267 sum(pgsize) AS compressed_size,
268 max((length(CASE WHEN path LIKE '%+%' THEN '' ELSE path END)+3)/4)
269 AS depth
dan599e9d22010-07-12 08:39:37 +0000270 FROM temp.dbstat WHERE name = $name
271 } break
272
drhb7708942011-10-05 18:18:13 +0000273 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
274 set storage [expr {$total_pages*$pageSize}]
275 if {!$isCompressed && $storage>$compressed_size} {
276 set isCompressed 1
277 set compressOverhead 14
278 }
279
dan599e9d22010-07-12 08:39:37 +0000280 # Column 'gap_cnt' is set to the number of non-contiguous entries in the
281 # list of pages visited if the b-tree structure is traversed in a top-down
282 # fashion (each node visited before its child-tree is passed). Any overflow
283 # chains present are traversed from start to finish before any child-tree
284 # is.
285 #
286 set gap_cnt 0
drh2f312ee2013-09-28 12:40:55 +0000287 set prev 0
288 db eval {
289 SELECT pageno, pagetype FROM temp.dbstat
290 WHERE name=$name
291 ORDER BY pageno
292 } {
293 if {$prev>0 && $pagetype=="leaf" && $pageno!=$prev+1} {
294 incr gap_cnt
295 }
296 set prev $pageno
dan599e9d22010-07-12 08:39:37 +0000297 }
dan599e9d22010-07-12 08:39:37 +0000298 mem eval {
299 INSERT INTO space_used VALUES(
300 $name,
301 $tblname,
302 $is_index,
dand49c6932016-05-06 15:16:02 +0000303 $is_without_rowid,
dan599e9d22010-07-12 08:39:37 +0000304 $nentry,
305 $leaf_entries,
drh8fb6c432015-08-04 14:18:10 +0000306 $depth,
dan599e9d22010-07-12 08:39:37 +0000307 $payload,
308 $ovfl_payload,
309 $ovfl_cnt,
310 $mx_payload,
311 $int_pages,
312 $leaf_pages,
313 $ovfl_pages,
314 $int_unused,
315 $leaf_unused,
316 $ovfl_unused,
drh4c9f1292011-09-28 00:50:14 +0000317 $gap_cnt,
318 $compressed_size
dan599e9d22010-07-12 08:39:37 +0000319 );
320 }
321}
322
danielk197724c92532005-02-01 10:36:40 +0000323proc integerify {real} {
drh03496882007-12-04 13:41:51 +0000324 if {[string is double -strict $real]} {
drhf08f3842011-09-27 13:40:26 +0000325 return [expr {wide($real)}]
drh03496882007-12-04 13:41:51 +0000326 } else {
327 return 0
328 }
danielk197724c92532005-02-01 10:36:40 +0000329}
330mem function int integerify
331
danielk19770ba87cb2004-11-09 07:42:11 +0000332# Quote a string for use in an SQL query. Examples:
drh3e27c022004-07-23 00:01:38 +0000333#
danielk19770ba87cb2004-11-09 07:42:11 +0000334# [quote {hello world}] == {'hello world'}
335# [quote {hello world's}] == {'hello world''s'}
drh3e27c022004-07-23 00:01:38 +0000336#
danielk19770ba87cb2004-11-09 07:42:11 +0000337proc quote {txt} {
drhcc071302013-07-17 18:12:15 +0000338 return [string map {' ''} $txt]
339}
340
341# Output a title line
342#
343proc titleline {title} {
344 if {$title==""} {
345 puts [string repeat * 79]
346 } else {
347 set len [string length $title]
348 set stars [string repeat * [expr 79-$len-5]]
349 puts "*** $title $stars"
350 }
drh3e27c022004-07-23 00:01:38 +0000351}
352
drh3e27c022004-07-23 00:01:38 +0000353# Generate a single line of output in the statistics section of the
354# report.
355#
356proc statline {title value {extra {}}} {
357 set len [string length $title]
drhcc071302013-07-17 18:12:15 +0000358 set dots [string repeat . [expr 50-$len]]
drh3e27c022004-07-23 00:01:38 +0000359 set len [string length $value]
360 set sp2 [string range { } $len end]
361 if {$extra ne ""} {
362 set extra " $extra"
363 }
364 puts "$title$dots $value$sp2$extra"
365}
366
367# Generate a formatted percentage value for $num/$denom
368#
369proc percent {num denom {of {}}} {
370 if {$denom==0.0} {return ""}
371 set v [expr {$num*100.0/$denom}]
372 set of {}
drhfaf60c72005-03-29 13:18:16 +0000373 if {$v==100.0 || $v<0.001 || ($v>1.0 && $v<99.0)} {
drh3e27c022004-07-23 00:01:38 +0000374 return [format {%5.1f%% %s} $v $of]
375 } elseif {$v<0.1 || $v>99.9} {
376 return [format {%7.3f%% %s} $v $of]
377 } else {
378 return [format {%6.2f%% %s} $v $of]
379 }
380}
381
danielk197724c92532005-02-01 10:36:40 +0000382proc divide {num denom} {
383 if {$denom==0} {return 0.0}
384 return [format %.2f [expr double($num)/double($denom)]]
385}
386
drh3e27c022004-07-23 00:01:38 +0000387# Generate a subreport that covers some subset of the database.
388# the $where clause determines which subset to analyze.
389#
drh2f312ee2013-09-28 12:40:55 +0000390proc subreport {title where showFrag} {
drhb7708942011-10-05 18:18:13 +0000391 global pageSize file_pgcnt compressOverhead
danielk19770ba87cb2004-11-09 07:42:11 +0000392
393 # Query the in-memory database for the sum of various statistics
394 # for the subset of tables/indices identified by the WHERE clause in
395 # $where. Note that even if the WHERE clause matches no rows, the
396 # following query returns exactly one row (because it is an aggregate).
397 #
398 # The results of the query are stored directly by SQLite into local
dand49c6932016-05-06 15:16:02 +0000399 # variables (i.e. $nentry, $payload etc.).
danielk19770ba87cb2004-11-09 07:42:11 +0000400 #
drh3e27c022004-07-23 00:01:38 +0000401 mem eval "
402 SELECT
dand49c6932016-05-06 15:16:02 +0000403 int(sum(
404 CASE WHEN (is_without_rowid OR is_index) THEN nentry
405 ELSE leaf_entries
406 END
407 )) AS nentry,
danielk197724c92532005-02-01 10:36:40 +0000408 int(sum(payload)) AS payload,
409 int(sum(ovfl_payload)) AS ovfl_payload,
drh3e27c022004-07-23 00:01:38 +0000410 max(mx_payload) AS mx_payload,
danielk197724c92532005-02-01 10:36:40 +0000411 int(sum(ovfl_cnt)) as ovfl_cnt,
412 int(sum(leaf_pages)) AS leaf_pages,
413 int(sum(int_pages)) AS int_pages,
414 int(sum(ovfl_pages)) AS ovfl_pages,
415 int(sum(leaf_unused)) AS leaf_unused,
416 int(sum(int_unused)) AS int_unused,
drh50c67062007-02-10 19:22:35 +0000417 int(sum(ovfl_unused)) AS ovfl_unused,
drh4c9f1292011-09-28 00:50:14 +0000418 int(sum(gap_cnt)) AS gap_cnt,
drh8fb6c432015-08-04 14:18:10 +0000419 int(sum(compressed_size)) AS compressed_size,
420 int(max(depth)) AS depth,
421 count(*) AS cnt
danielk19770ba87cb2004-11-09 07:42:11 +0000422 FROM space_used WHERE $where" {} {}
423
424 # Output the sub-report title, nicely decorated with * characters.
425 #
drh3e27c022004-07-23 00:01:38 +0000426 puts ""
drhcc071302013-07-17 18:12:15 +0000427 titleline $title
drh3e27c022004-07-23 00:01:38 +0000428 puts ""
danielk19770ba87cb2004-11-09 07:42:11 +0000429
430 # Calculate statistics and store the results in TCL variables, as follows:
431 #
432 # total_pages: Database pages consumed.
433 # total_pages_percent: Pages consumed as a percentage of the file.
434 # storage: Bytes consumed.
435 # payload_percent: Payload bytes used as a percentage of $storage.
436 # total_unused: Unused bytes on pages.
437 # avg_payload: Average payload per btree entry.
438 # avg_fanout: Average fanout for internal pages.
439 # avg_unused: Average unused bytes per btree entry.
drhc74d9622017-06-15 00:52:03 +0000440 # avg_meta: Average metadata overhead per entry.
danielk19770ba87cb2004-11-09 07:42:11 +0000441 # ovfl_cnt_percent: Percentage of btree entries that use overflow pages.
442 #
drh3e27c022004-07-23 00:01:38 +0000443 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
danielk19770ba87cb2004-11-09 07:42:11 +0000444 set total_pages_percent [percent $total_pages $file_pgcnt]
drh3e27c022004-07-23 00:01:38 +0000445 set storage [expr {$total_pages*$pageSize}]
danielk19770ba87cb2004-11-09 07:42:11 +0000446 set payload_percent [percent $payload $storage {of storage consumed}]
447 set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}]
dand49c6932016-05-06 15:16:02 +0000448 set avg_payload [divide $payload $nentry]
449 set avg_unused [divide $total_unused $nentry]
drhc74d9622017-06-15 00:52:03 +0000450 set total_meta [expr {$storage - $payload - $total_unused}]
451 set total_meta [expr {$total_meta + 4*($ovfl_pages - $ovfl_cnt)}]
452 set meta_percent [percent $total_meta $storage {of metadata}]
453 set avg_meta [divide $total_meta $nentry]
drh3e27c022004-07-23 00:01:38 +0000454 if {$int_pages>0} {
danielk19770ba87cb2004-11-09 07:42:11 +0000455 # TODO: Is this formula correct?
danielk197724c92532005-02-01 10:36:40 +0000456 set nTab [mem eval "
457 SELECT count(*) FROM (
458 SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0
459 )
460 "]
461 set avg_fanout [mem eval "
462 SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used
drh8fb6c432015-08-04 14:18:10 +0000463 WHERE $where
danielk197724c92532005-02-01 10:36:40 +0000464 "]
465 set avg_fanout [format %.2f $avg_fanout]
drh3e27c022004-07-23 00:01:38 +0000466 }
dand49c6932016-05-06 15:16:02 +0000467 set ovfl_cnt_percent [percent $ovfl_cnt $nentry {of all entries}]
danielk19770ba87cb2004-11-09 07:42:11 +0000468
469 # Print out the sub-report statistics.
470 #
471 statline {Percentage of total database} $total_pages_percent
dand49c6932016-05-06 15:16:02 +0000472 statline {Number of entries} $nentry
danielk19770ba87cb2004-11-09 07:42:11 +0000473 statline {Bytes of storage consumed} $storage
drh4c9f1292011-09-28 00:50:14 +0000474 if {$compressed_size!=$storage} {
drhb7708942011-10-05 18:18:13 +0000475 set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}]
drh4c9f1292011-09-28 00:50:14 +0000476 set pct [expr {$compressed_size*100.0/$storage}]
477 set pct [format {%5.1f%%} $pct]
478 statline {Bytes used after compression} $compressed_size $pct
479 }
danielk19770ba87cb2004-11-09 07:42:11 +0000480 statline {Bytes of payload} $payload $payload_percent
drhc74d9622017-06-15 00:52:03 +0000481 statline {Bytes of metadata} $total_meta $meta_percent
drh8fb6c432015-08-04 14:18:10 +0000482 if {$cnt==1} {statline {B-tree depth} $depth}
danielk19770ba87cb2004-11-09 07:42:11 +0000483 statline {Average payload per entry} $avg_payload
484 statline {Average unused bytes per entry} $avg_unused
drhc74d9622017-06-15 00:52:03 +0000485 statline {Average metadata per entry} $avg_meta
danielk19770ba87cb2004-11-09 07:42:11 +0000486 if {[info exists avg_fanout]} {
487 statline {Average fanout} $avg_fanout
488 }
drh2f312ee2013-09-28 12:40:55 +0000489 if {$showFrag && $total_pages>1} {
490 set fragmentation [percent $gap_cnt [expr {$total_pages-1}]]
491 statline {Non-sequential pages} $gap_cnt $fragmentation
drh50c67062007-02-10 19:22:35 +0000492 }
danielk19770ba87cb2004-11-09 07:42:11 +0000493 statline {Maximum payload per entry} $mx_payload
494 statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent
drh3e27c022004-07-23 00:01:38 +0000495 if {$int_pages>0} {
danielk19770ba87cb2004-11-09 07:42:11 +0000496 statline {Index pages used} $int_pages
drh3e27c022004-07-23 00:01:38 +0000497 }
danielk19770ba87cb2004-11-09 07:42:11 +0000498 statline {Primary pages used} $leaf_pages
499 statline {Overflow pages used} $ovfl_pages
500 statline {Total pages used} $total_pages
drh3e27c022004-07-23 00:01:38 +0000501 if {$int_unused>0} {
drh4515a452011-08-31 17:46:50 +0000502 set int_unused_percent [
503 percent $int_unused [expr {$int_pages*$pageSize}] {of index space}]
danielk19770ba87cb2004-11-09 07:42:11 +0000504 statline "Unused bytes on index pages" $int_unused $int_unused_percent
drh3e27c022004-07-23 00:01:38 +0000505 }
drh4515a452011-08-31 17:46:50 +0000506 statline "Unused bytes on primary pages" $leaf_unused [
507 percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}]
508 statline "Unused bytes on overflow pages" $ovfl_unused [
509 percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}]
510 statline "Unused bytes on all pages" $total_unused [
511 percent $total_unused $storage {of all space}]
drh3e27c022004-07-23 00:01:38 +0000512 return 1
513}
514
danielk197716254452004-11-08 16:15:09 +0000515# Calculate the overhead in pages caused by auto-vacuum.
516#
517# This procedure calculates and returns the number of pages used by the
518# auto-vacuum 'pointer-map'. If the database does not support auto-vacuum,
519# then 0 is returned. The two arguments are the size of the database file in
danielk19770ba87cb2004-11-09 07:42:11 +0000520# pages and the page size used by the database (in bytes).
danielk197716254452004-11-08 16:15:09 +0000521proc autovacuum_overhead {filePages pageSize} {
522
dan599e9d22010-07-12 08:39:37 +0000523 # Set $autovacuum to non-zero for databases that support auto-vacuum.
524 set autovacuum [db one {PRAGMA auto_vacuum}]
danielk197716254452004-11-08 16:15:09 +0000525
526 # If the database is not an auto-vacuum database or the file consists
527 # of one page only then there is no overhead for auto-vacuum. Return zero.
dan599e9d22010-07-12 08:39:37 +0000528 if {0==$autovacuum || $filePages==1} {
danielk197716254452004-11-08 16:15:09 +0000529 return 0
530 }
531
532 # The number of entries on each pointer map page. The layout of the
533 # database file is one pointer-map page, followed by $ptrsPerPage other
534 # pages, followed by a pointer-map page etc. The first pointer-map page
535 # is the second page of the file overall.
536 set ptrsPerPage [expr double($pageSize/5)]
537
538 # Return the number of pointer map pages in the database.
drhf08f3842011-09-27 13:40:26 +0000539 return [expr wide(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))]
danielk197716254452004-11-08 16:15:09 +0000540}
541
danielk197716254452004-11-08 16:15:09 +0000542
danielk19770ba87cb2004-11-09 07:42:11 +0000543# Calculate the summary statistics for the database and store the results
544# in TCL variables. They are output below. Variables are as follows:
danielk197716254452004-11-08 16:15:09 +0000545#
546# pageSize: Size of each page in bytes.
547# file_bytes: File size in bytes.
548# file_pgcnt: Number of pages in the file.
549# file_pgcnt2: Number of pages in the file (calculated).
550# av_pgcnt: Pages consumed by the auto-vacuum pointer-map.
551# av_percent: Percentage of the file consumed by auto-vacuum pointer-map.
552# inuse_pgcnt: Data pages in the file.
553# inuse_percent: Percentage of pages used to store data.
554# free_pgcnt: Free pages calculated as (<total pages> - <in-use pages>)
555# free_pgcnt2: Free pages in the file according to the file header.
556# free_percent: Percentage of file consumed by free pages (calculated).
557# free_percent2: Percentage of file consumed by free pages (header).
558# ntable: Number of tables in the db.
559# nindex: Number of indices in the db.
560# nautoindex: Number of indices created automatically.
561# nmanindex: Number of indices created manually.
danielk19770ba87cb2004-11-09 07:42:11 +0000562# user_payload: Number of bytes of payload in table btrees
563# (not including sqlite_master)
564# user_percent: $user_payload as a percentage of total file size.
danielk197716254452004-11-08 16:15:09 +0000565
dan64b41c72011-09-26 19:32:47 +0000566### The following, setting $file_bytes based on the actual size of the file
567### on disk, causes this tool to choke on zipvfs databases. So set it based
568### on the return of [PRAGMA page_count] instead.
569if 0 {
570 set file_bytes [file size $file_to_analyze]
571 set file_pgcnt [expr {$file_bytes/$pageSize}]
572}
573set file_pgcnt [db one {PRAGMA page_count}]
drhf08f3842011-09-27 13:40:26 +0000574set file_bytes [expr {$file_pgcnt * $pageSize}]
danielk197716254452004-11-08 16:15:09 +0000575
576set av_pgcnt [autovacuum_overhead $file_pgcnt $pageSize]
577set av_percent [percent $av_pgcnt $file_pgcnt]
578
danielk19770ba87cb2004-11-09 07:42:11 +0000579set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used}
drhf08f3842011-09-27 13:40:26 +0000580set inuse_pgcnt [expr wide([mem eval $sql])]
danielk197716254452004-11-08 16:15:09 +0000581set inuse_percent [percent $inuse_pgcnt $file_pgcnt]
582
drhf08f3842011-09-27 13:40:26 +0000583set free_pgcnt [expr {$file_pgcnt-$inuse_pgcnt-$av_pgcnt}]
danielk197716254452004-11-08 16:15:09 +0000584set free_percent [percent $free_pgcnt $file_pgcnt]
dan599e9d22010-07-12 08:39:37 +0000585set free_pgcnt2 [db one {PRAGMA freelist_count}]
danielk197716254452004-11-08 16:15:09 +0000586set free_percent2 [percent $free_pgcnt2 $file_pgcnt]
587
588set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}]
drh3e27c022004-07-23 00:01:38 +0000589
590set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}]
drh3e27c022004-07-23 00:01:38 +0000591set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}]
danielk19770ba87cb2004-11-09 07:42:11 +0000592set sql {SELECT count(*) FROM sqlite_master WHERE name LIKE 'sqlite_autoindex%'}
593set nautoindex [db eval $sql]
danielk197716254452004-11-08 16:15:09 +0000594set nmanindex [expr {$nindex-$nautoindex}]
595
596# set total_payload [mem eval "SELECT sum(payload) FROM space_used"]
danielk197724c92532005-02-01 10:36:40 +0000597set user_payload [mem one {SELECT int(sum(payload)) FROM space_used
drh3e27c022004-07-23 00:01:38 +0000598 WHERE NOT is_index AND name NOT LIKE 'sqlite_master'}]
danielk19770ba87cb2004-11-09 07:42:11 +0000599set user_percent [percent $user_payload $file_bytes]
danielk197716254452004-11-08 16:15:09 +0000600
danielk19770ba87cb2004-11-09 07:42:11 +0000601# Output the summary statistics calculated above.
602#
drh453a3122012-10-10 10:52:46 +0000603puts "/** Disk-Space Utilization Report For $root_filename"
danielk19770ba87cb2004-11-09 07:42:11 +0000604puts ""
danielk197716254452004-11-08 16:15:09 +0000605statline {Page size in bytes} $pageSize
606statline {Pages in the whole file (measured)} $file_pgcnt
607statline {Pages in the whole file (calculated)} $file_pgcnt2
608statline {Pages that store data} $inuse_pgcnt $inuse_percent
609statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2
610statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent
611statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent
612statline {Number of tables in the database} $ntable
613statline {Number of indices} $nindex
drh2f312ee2013-09-28 12:40:55 +0000614statline {Number of defined indices} $nmanindex
615statline {Number of implied indices} $nautoindex
drhb7708942011-10-05 18:18:13 +0000616if {$isCompressed} {
617 statline {Size of uncompressed content in bytes} $file_bytes
618 set efficiency [percent $true_file_size $file_bytes]
619 statline {Size of compressed file on disk} $true_file_size $efficiency
620} else {
621 statline {Size of the file in bytes} $file_bytes
622}
danielk19770ba87cb2004-11-09 07:42:11 +0000623statline {Bytes of user payload stored} $user_payload $user_percent
drh3e27c022004-07-23 00:01:38 +0000624
625# Output table rankings
626#
627puts ""
drhcc071302013-07-17 18:12:15 +0000628titleline "Page counts for all tables with their indices"
drh3e27c022004-07-23 00:01:38 +0000629puts ""
630mem eval {SELECT tblname, count(*) AS cnt,
danielk197724c92532005-02-01 10:36:40 +0000631 int(sum(int_pages+leaf_pages+ovfl_pages)) AS size
drhfaf60c72005-03-29 13:18:16 +0000632 FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} {
drh3e27c022004-07-23 00:01:38 +0000633 statline [string toupper $tblname] $size [percent $size $file_pgcnt]
634}
drhcc071302013-07-17 18:12:15 +0000635puts ""
636titleline "Page counts for all tables and indices separately"
637puts ""
638mem eval {
639 SELECT
640 upper(name) AS nm,
641 int(int_pages+leaf_pages+ovfl_pages) AS size
642 FROM space_used
643 ORDER BY size+0 DESC, name} {} {
644 statline $nm $size [percent $size $file_pgcnt]
645}
drhb7708942011-10-05 18:18:13 +0000646if {$isCompressed} {
647 puts ""
drhcc071302013-07-17 18:12:15 +0000648 titleline "Bytes of disk space used after compression"
drhb7708942011-10-05 18:18:13 +0000649 puts ""
650 set csum 0
651 mem eval {SELECT tblname,
652 int(sum(compressed_size)) +
653 $compressOverhead*sum(int_pages+leaf_pages+ovfl_pages)
654 AS csize
655 FROM space_used GROUP BY tblname ORDER BY csize+0 DESC, tblname} {} {
656 incr csum $csize
657 statline [string toupper $tblname] $csize [percent $csize $true_file_size]
658 }
659 set overhead [expr {$true_file_size - $csum}]
660 if {$overhead>0} {
661 statline {Header and free space} $overhead [percent $overhead $true_file_size]
662 }
663}
drh3e27c022004-07-23 00:01:38 +0000664
665# Output subreports
666#
667if {$nindex>0} {
drh2f312ee2013-09-28 12:40:55 +0000668 subreport {All tables and indices} 1 0
drh3e27c022004-07-23 00:01:38 +0000669}
drh2f312ee2013-09-28 12:40:55 +0000670subreport {All tables} {NOT is_index} 0
drh3e27c022004-07-23 00:01:38 +0000671if {$nindex>0} {
drh2f312ee2013-09-28 12:40:55 +0000672 subreport {All indices} {is_index} 0
drh3e27c022004-07-23 00:01:38 +0000673}
drh7913e412013-11-01 20:30:36 +0000674foreach tbl [mem eval {SELECT DISTINCT tblname name FROM space_used
drh3e27c022004-07-23 00:01:38 +0000675 ORDER BY name}] {
drhcc071302013-07-17 18:12:15 +0000676 set qn [quote $tbl]
drh3e27c022004-07-23 00:01:38 +0000677 set name [string toupper $tbl]
drhcc071302013-07-17 18:12:15 +0000678 set n [mem eval {SELECT count(*) FROM space_used WHERE tblname=$tbl}]
drh3e27c022004-07-23 00:01:38 +0000679 if {$n>1} {
drhcc071302013-07-17 18:12:15 +0000680 set idxlist [mem eval "SELECT name FROM space_used
681 WHERE tblname='$qn' AND is_index
682 ORDER BY 1"]
drh2f312ee2013-09-28 12:40:55 +0000683 subreport "Table $name and all its indices" "tblname='$qn'" 0
684 subreport "Table $name w/o any indices" "name='$qn'" 1
drhcc071302013-07-17 18:12:15 +0000685 if {[llength $idxlist]>1} {
drh2f312ee2013-09-28 12:40:55 +0000686 subreport "Indices of table $name" "tblname='$qn' AND is_index" 0
drhcc071302013-07-17 18:12:15 +0000687 }
688 foreach idx $idxlist {
689 set qidx [quote $idx]
drh2f312ee2013-09-28 12:40:55 +0000690 subreport "Index [string toupper $idx] of table $name" "name='$qidx'" 1
drhcc071302013-07-17 18:12:15 +0000691 }
drh3e27c022004-07-23 00:01:38 +0000692 } else {
drh2f312ee2013-09-28 12:40:55 +0000693 subreport "Table $name" "name='$qn'" 1
drh3e27c022004-07-23 00:01:38 +0000694 }
695}
696
697# Output instructions on what the numbers above mean.
698#
drhcc071302013-07-17 18:12:15 +0000699puts ""
700titleline Definitions
drh3e27c022004-07-23 00:01:38 +0000701puts {
drh3e27c022004-07-23 00:01:38 +0000702Page size in bytes
703
704 The number of bytes in a single page of the database file.
705 Usually 1024.
706
707Number of pages in the whole file
708}
drh4515a452011-08-31 17:46:50 +0000709puts " The number of $pageSize-byte pages that go into forming the complete
drh3e27c022004-07-23 00:01:38 +0000710 database"
drh4515a452011-08-31 17:46:50 +0000711puts {
drh3e27c022004-07-23 00:01:38 +0000712Pages that store data
713
714 The number of pages that store data, either as primary B*Tree pages or
715 as overflow pages. The number at the right is the data pages divided by
716 the total number of pages in the file.
717
718Pages on the freelist
719
720 The number of pages that are not currently in use but are reserved for
721 future use. The percentage at the right is the number of freelist pages
722 divided by the total number of pages in the file.
723
danielk197716254452004-11-08 16:15:09 +0000724Pages of auto-vacuum overhead
725
726 The number of pages that store data used by the database to facilitate
727 auto-vacuum. This is zero for databases that do not support auto-vacuum.
728
drh3e27c022004-07-23 00:01:38 +0000729Number of tables in the database
730
731 The number of tables in the database, including the SQLITE_MASTER table
732 used to store schema information.
733
734Number of indices
735
736 The total number of indices in the database.
737
drh2f312ee2013-09-28 12:40:55 +0000738Number of defined indices
drh3e27c022004-07-23 00:01:38 +0000739
740 The number of indices created using an explicit CREATE INDEX statement.
741
drh2f312ee2013-09-28 12:40:55 +0000742Number of implied indices
drh3e27c022004-07-23 00:01:38 +0000743
744 The number of indices used to implement PRIMARY KEY or UNIQUE constraints
745 on tables.
746
747Size of the file in bytes
748
749 The total amount of disk space used by the entire database files.
750
751Bytes of user payload stored
752
753 The total number of bytes of user payload stored in the database. The
754 schema information in the SQLITE_MASTER table is not counted when
755 computing this number. The percentage at the right shows the payload
756 divided by the total file size.
757
758Percentage of total database
759
760 The amount of the complete database file that is devoted to storing
761 information described by this category.
762
763Number of entries
764
765 The total number of B-Tree key/value pairs stored under this category.
766
767Bytes of storage consumed
768
769 The total amount of disk space required to store all B-Tree entries
770 under this category. The is the total number of pages used times
771 the pages size.
772
773Bytes of payload
774
775 The amount of payload stored under this category. Payload is the data
776 part of table entries and the key part of index entries. The percentage
777 at the right is the bytes of payload divided by the bytes of storage
778 consumed.
779
drhc74d9622017-06-15 00:52:03 +0000780Bytes of metadata
781
drhf217f3b2017-06-15 16:45:23 +0000782 The amount of formatting and structural information stored in the
drhc74d9622017-06-15 00:52:03 +0000783 table or index. Metadata includes the btree page header, the cell pointer
784 array, the size field for each cell, the left child pointer or non-leaf
785 cells, the overflow pointers for overflow cells, and the rowid value for
drhf217f3b2017-06-15 16:45:23 +0000786 rowid table cells. In other words, metadata is everything that is neither
787 unused space nor content. The record header in the payload is counted as
788 content, not metadata.
drhc74d9622017-06-15 00:52:03 +0000789
drh3e27c022004-07-23 00:01:38 +0000790Average payload per entry
791
792 The average amount of payload on each entry. This is just the bytes of
793 payload divided by the number of entries.
794
795Average unused bytes per entry
796
797 The average amount of free space remaining on all pages under this
798 category on a per-entry basis. This is the number of unused bytes on
799 all pages divided by the number of entries.
800
drh2f312ee2013-09-28 12:40:55 +0000801Non-sequential pages
drhfc6e0c92007-02-13 01:41:52 +0000802
drh2f312ee2013-09-28 12:40:55 +0000803 The number of pages in the table or index that are out of sequence.
804 Many filesystems are optimized for sequential file access so a small
805 number of non-sequential pages might result in faster queries,
806 especially for larger database files that do not fit in the disk cache.
807 Note that after running VACUUM, the root page of each table or index is
808 at the beginning of the database file and all other pages are in a
809 separate part of the database file, resulting in a single non-
810 sequential page.
drhfc6e0c92007-02-13 01:41:52 +0000811
drh3e27c022004-07-23 00:01:38 +0000812Maximum payload per entry
813
814 The largest payload size of any entry.
815
816Entries that use overflow
817
818 The number of entries that user one or more overflow pages.
819
820Total pages used
821
822 This is the number of pages used to hold all information in the current
823 category. This is the sum of index, primary, and overflow pages.
824
825Index pages used
826
827 This is the number of pages in a table B-tree that hold only key (rowid)
828 information and no data.
829
830Primary pages used
831
832 This is the number of B-tree pages that hold both key and data.
833
834Overflow pages used
835
836 The total number of overflow pages used for this category.
837
838Unused bytes on index pages
839
840 The total number of bytes of unused space on all index pages. The
841 percentage at the right is the number of unused bytes divided by the
842 total number of bytes on index pages.
843
844Unused bytes on primary pages
845
846 The total number of bytes of unused space on all primary pages. The
847 percentage at the right is the number of unused bytes divided by the
848 total number of bytes on primary pages.
849
850Unused bytes on overflow pages
851
852 The total number of bytes of unused space on all overflow pages. The
853 percentage at the right is the number of unused bytes divided by the
854 total number of bytes on overflow pages.
855
856Unused bytes on all pages
857
858 The total number of bytes of unused space on all primary and overflow
859 pages. The percentage at the right is the number of unused bytes
860 divided by the total number of bytes.
861}
862
danielk19770ba87cb2004-11-09 07:42:11 +0000863# Output a dump of the in-memory database. This can be used for more
864# complex offline analysis.
drh3e27c022004-07-23 00:01:38 +0000865#
drhcc071302013-07-17 18:12:15 +0000866titleline {}
drh3e27c022004-07-23 00:01:38 +0000867puts "The entire text of this report can be sourced into any SQL database"
868puts "engine for further analysis. All of the text above is an SQL comment."
869puts "The data used to generate this report follows:"
870puts "*/"
871puts "BEGIN;"
872puts $tabledef
873unset -nocomplain x
874mem eval {SELECT * FROM space_used} x {
875 puts -nonewline "INSERT INTO space_used VALUES"
876 set sep (
877 foreach col $x(*) {
878 set v $x($col)
drha4641712013-11-02 11:34:58 +0000879 if {$v=="" || ![string is double $v]} {set v '[quote $v]'}
drh3e27c022004-07-23 00:01:38 +0000880 puts -nonewline $sep$v
881 set sep ,
882 }
883 puts ");"
884}
885puts "COMMIT;"
drha7531c62006-01-24 02:19:53 +0000886
887} err]} {
888 puts "ERROR: $err"
889 puts $errorInfo
890 exit 1
891}