blob: 38d954162e438f86b9505dd602ad478e41549a4b [file] [log] [blame]
drh3e27c022004-07-23 00:01:38 +00001# Run this TCL script using "testfixture" in order get a report that shows
2# how much disk space is used by a particular data to actually store data
3# versus how much space is unused.
4#
5
drha7531c62006-01-24 02:19:53 +00006if {[catch {
dan9fab5ed2015-02-09 17:46:11 +00007
8# Argument $tname is the name of a table within the database opened by
9# database handle [db]. Return true if it is a WITHOUT ROWID table, or
10# false otherwise.
11#
12proc is_without_rowid {tname} {
13 set t [string map {' ''} $tname]
14 db eval "PRAGMA index_list = '$t'" o {
15 if {$o(origin) == "pk"} {
16 set n $o(name)
17 if {0==[db one { SELECT count(*) FROM sqlite_master WHERE name=$n }]} {
18 return 1
19 }
20 }
21 }
22 return 0
23}
24
drh3e27c022004-07-23 00:01:38 +000025# Get the name of the database to analyze
26#
drhb7708942011-10-05 18:18:13 +000027proc usage {} {
28 set argv0 [file rootname [file tail [info nameofexecutable]]]
drh310a8d62015-09-08 17:31:30 +000029 puts stderr "Usage: $argv0 ?--pageinfo? ?--stats? database-filename"
30 puts stderr {
31Analyze the SQLite3 database file specified by the "database-filename"
32argument and output a report detailing size and storage efficiency
33information for the database and its constituent tables and indexes.
34
35Options:
36
37 --stats Output SQL text that creates a new database containing
38 statistics about the database that was analyzed
39
40 --pageinfo Show how each page of the database-file is used
41}
drh3e27c022004-07-23 00:01:38 +000042 exit 1
43}
drhb7708942011-10-05 18:18:13 +000044set file_to_analyze {}
45set flags(-pageinfo) 0
drha624fd52011-10-05 19:46:03 +000046set flags(-stats) 0
drhb7708942011-10-05 18:18:13 +000047append argv {}
48foreach arg $argv {
49 if {[regexp {^-+pageinfo$} $arg]} {
50 set flags(-pageinfo) 1
drha624fd52011-10-05 19:46:03 +000051 } elseif {[regexp {^-+stats$} $arg]} {
52 set flags(-stats) 1
drhb7708942011-10-05 18:18:13 +000053 } elseif {[regexp {^-} $arg]} {
54 puts stderr "Unknown option: $arg"
55 usage
56 } elseif {$file_to_analyze!=""} {
57 usage
58 } else {
59 set file_to_analyze $arg
60 }
61}
62if {$file_to_analyze==""} usage
drh453a3122012-10-10 10:52:46 +000063set root_filename $file_to_analyze
64regexp {^file:(//)?([^?]*)} $file_to_analyze all x1 root_filename
65if {![file exists $root_filename]} {
66 puts stderr "No such file: $root_filename"
drh3e27c022004-07-23 00:01:38 +000067 exit 1
68}
drh453a3122012-10-10 10:52:46 +000069if {![file readable $root_filename]} {
70 puts stderr "File is not readable: $root_filename"
drh3e27c022004-07-23 00:01:38 +000071 exit 1
72}
drh453a3122012-10-10 10:52:46 +000073set true_file_size [file size $root_filename]
drhb7708942011-10-05 18:18:13 +000074if {$true_file_size<512} {
drh453a3122012-10-10 10:52:46 +000075 puts stderr "Empty or malformed database: $root_filename"
drh3e27c022004-07-23 00:01:38 +000076 exit 1
77}
78
drh36c06322011-10-10 16:06:35 +000079# Compute the total file size assuming test_multiplexor is being used.
80# Assume that SQLITE_ENABLE_8_3_NAMES might be enabled
81#
drh453a3122012-10-10 10:52:46 +000082set extension [file extension $root_filename]
83set pattern $root_filename
drh37002622012-04-06 00:09:27 +000084append pattern {[0-3][0-9][0-9]}
drh36c06322011-10-10 16:06:35 +000085foreach f [glob -nocomplain $pattern] {
86 incr true_file_size [file size $f]
87 set extension {}
88}
89if {[string length $extension]>=2 && [string length $extension]<=4} {
drh453a3122012-10-10 10:52:46 +000090 set pattern [file rootname $root_filename]
drh37002622012-04-06 00:09:27 +000091 append pattern {.[0-3][0-9][0-9]}
drh36c06322011-10-10 16:06:35 +000092 foreach f [glob -nocomplain $pattern] {
93 incr true_file_size [file size $f]
94 }
95}
96
drh3e27c022004-07-23 00:01:38 +000097# Open the database
98#
drh453a3122012-10-10 10:52:46 +000099if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} {
100 puts stderr "error trying to open $file_to_analyze: $msg"
101 exit 1
102}
dan599e9d22010-07-12 08:39:37 +0000103
drh565621a2011-09-21 20:10:42 +0000104db eval {SELECT count(*) FROM sqlite_master}
drhf08f3842011-09-27 13:40:26 +0000105set pageSize [expr {wide([db one {PRAGMA page_size}])}]
drh3e27c022004-07-23 00:01:38 +0000106
drha624fd52011-10-05 19:46:03 +0000107if {$flags(-pageinfo)} {
108 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
109 db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} {
110 puts "$pageno $name $path"
111 }
112 exit 0
113}
114if {$flags(-stats)} {
115 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
116 puts "BEGIN;"
117 puts "CREATE TABLE stats("
118 puts " name STRING, /* Name of table or index */"
119 puts " path INTEGER, /* Path to page from root */"
120 puts " pageno INTEGER, /* Page number */"
121 puts " pagetype STRING, /* 'internal', 'leaf' or 'overflow' */"
122 puts " ncell INTEGER, /* Cells on page (0 for overflow) */"
123 puts " payload INTEGER, /* Bytes of payload on this page */"
124 puts " unused INTEGER, /* Bytes of unused space on this page */"
125 puts " mx_payload INTEGER, /* Largest payload size of all cells */"
126 puts " pgoffset INTEGER, /* Offset of page in file */"
127 puts " pgsize INTEGER /* Size of the page */"
128 puts ");"
129 db eval {SELECT quote(name) || ',' ||
130 quote(path) || ',' ||
131 quote(pageno) || ',' ||
132 quote(pagetype) || ',' ||
133 quote(ncell) || ',' ||
134 quote(payload) || ',' ||
135 quote(unused) || ',' ||
136 quote(mx_payload) || ',' ||
137 quote(pgoffset) || ',' ||
138 quote(pgsize) AS x FROM stat} {
139 puts "INSERT INTO stats VALUES($x);"
140 }
141 puts "COMMIT;"
142 exit 0
143}
144
danielk19770ba87cb2004-11-09 07:42:11 +0000145# In-memory database for collecting statistics. This script loops through
146# the tables and indices in the database being analyzed, adding a row for each
147# to an in-memory database (for which the schema is shown below). It then
148# queries the in-memory db to produce the space-analysis report.
drh3e27c022004-07-23 00:01:38 +0000149#
150sqlite3 mem :memory:
drh4515a452011-08-31 17:46:50 +0000151set tabledef {CREATE TABLE space_used(
drh3e27c022004-07-23 00:01:38 +0000152 name clob, -- Name of a table or index in the database file
153 tblname clob, -- Name of associated table
154 is_index boolean, -- TRUE if it is an index, false for a table
155 nentry int, -- Number of entries in the BTree
156 leaf_entries int, -- Number of leaf entries
drh8fb6c432015-08-04 14:18:10 +0000157 depth int, -- Depth of the b-tree
drh3e27c022004-07-23 00:01:38 +0000158 payload int, -- Total amount of data stored in this table or index
159 ovfl_payload int, -- Total amount of data stored on overflow pages
160 ovfl_cnt int, -- Number of entries that use overflow
161 mx_payload int, -- Maximum payload size
162 int_pages int, -- Number of interior pages used
163 leaf_pages int, -- Number of leaf pages used
164 ovfl_pages int, -- Number of overflow pages used
165 int_unused int, -- Number of unused bytes on interior pages
166 leaf_unused int, -- Number of unused bytes on primary pages
drh50c67062007-02-10 19:22:35 +0000167 ovfl_unused int, -- Number of unused bytes on overflow pages
drh4c9f1292011-09-28 00:50:14 +0000168 gap_cnt int, -- Number of gaps in the page layout
169 compressed_size int -- Total bytes stored on disk
drh3e27c022004-07-23 00:01:38 +0000170);}
171mem eval $tabledef
172
dan599e9d22010-07-12 08:39:37 +0000173# Create a temporary "dbstat" virtual table.
174#
drh565621a2011-09-21 20:10:42 +0000175db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
176db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat
177 ORDER BY name, path}
178db eval {DROP TABLE temp.stat}
dan599e9d22010-07-12 08:39:37 +0000179
drhb7708942011-10-05 18:18:13 +0000180set isCompressed 0
181set compressOverhead 0
drh8fb6c432015-08-04 14:18:10 +0000182set depth 0
dan599e9d22010-07-12 08:39:37 +0000183set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 }
184foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] {
185
186 set is_index [expr {$name!=$tblname}]
dan9fab5ed2015-02-09 17:46:11 +0000187 set idx_btree [expr {$is_index || [is_without_rowid $name]}]
dan599e9d22010-07-12 08:39:37 +0000188 db eval {
189 SELECT
190 sum(ncell) AS nentry,
drh8fb6c432015-08-04 14:18:10 +0000191 sum((pagetype=='leaf')*ncell) AS leaf_entries,
dan599e9d22010-07-12 08:39:37 +0000192 sum(payload) AS payload,
drh8fb6c432015-08-04 14:18:10 +0000193 sum((pagetype=='overflow') * payload) AS ovfl_payload,
dan599e9d22010-07-12 08:39:37 +0000194 sum(path LIKE '%+000000') AS ovfl_cnt,
195 max(mx_payload) AS mx_payload,
drh8fb6c432015-08-04 14:18:10 +0000196 sum(pagetype=='internal') AS int_pages,
197 sum(pagetype=='leaf') AS leaf_pages,
198 sum(pagetype=='overflow') AS ovfl_pages,
199 sum((pagetype=='internal') * unused) AS int_unused,
200 sum((pagetype=='leaf') * unused) AS leaf_unused,
201 sum((pagetype=='overflow') * unused) AS ovfl_unused,
202 sum(pgsize) AS compressed_size,
203 max((length(CASE WHEN path LIKE '%+%' THEN '' ELSE path END)+3)/4)
204 AS depth
dan599e9d22010-07-12 08:39:37 +0000205 FROM temp.dbstat WHERE name = $name
206 } break
207
drhb7708942011-10-05 18:18:13 +0000208 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
209 set storage [expr {$total_pages*$pageSize}]
210 if {!$isCompressed && $storage>$compressed_size} {
211 set isCompressed 1
212 set compressOverhead 14
213 }
214
dan599e9d22010-07-12 08:39:37 +0000215 # Column 'gap_cnt' is set to the number of non-contiguous entries in the
216 # list of pages visited if the b-tree structure is traversed in a top-down
217 # fashion (each node visited before its child-tree is passed). Any overflow
218 # chains present are traversed from start to finish before any child-tree
219 # is.
220 #
221 set gap_cnt 0
drh2f312ee2013-09-28 12:40:55 +0000222 set prev 0
223 db eval {
224 SELECT pageno, pagetype FROM temp.dbstat
225 WHERE name=$name
226 ORDER BY pageno
227 } {
228 if {$prev>0 && $pagetype=="leaf" && $pageno!=$prev+1} {
229 incr gap_cnt
230 }
231 set prev $pageno
dan599e9d22010-07-12 08:39:37 +0000232 }
dan599e9d22010-07-12 08:39:37 +0000233 mem eval {
234 INSERT INTO space_used VALUES(
235 $name,
236 $tblname,
237 $is_index,
238 $nentry,
239 $leaf_entries,
drh8fb6c432015-08-04 14:18:10 +0000240 $depth,
dan599e9d22010-07-12 08:39:37 +0000241 $payload,
242 $ovfl_payload,
243 $ovfl_cnt,
244 $mx_payload,
245 $int_pages,
246 $leaf_pages,
247 $ovfl_pages,
248 $int_unused,
249 $leaf_unused,
250 $ovfl_unused,
drh4c9f1292011-09-28 00:50:14 +0000251 $gap_cnt,
252 $compressed_size
dan599e9d22010-07-12 08:39:37 +0000253 );
254 }
255}
256
danielk197724c92532005-02-01 10:36:40 +0000257proc integerify {real} {
drh03496882007-12-04 13:41:51 +0000258 if {[string is double -strict $real]} {
drhf08f3842011-09-27 13:40:26 +0000259 return [expr {wide($real)}]
drh03496882007-12-04 13:41:51 +0000260 } else {
261 return 0
262 }
danielk197724c92532005-02-01 10:36:40 +0000263}
264mem function int integerify
265
danielk19770ba87cb2004-11-09 07:42:11 +0000266# Quote a string for use in an SQL query. Examples:
drh3e27c022004-07-23 00:01:38 +0000267#
danielk19770ba87cb2004-11-09 07:42:11 +0000268# [quote {hello world}] == {'hello world'}
269# [quote {hello world's}] == {'hello world''s'}
drh3e27c022004-07-23 00:01:38 +0000270#
danielk19770ba87cb2004-11-09 07:42:11 +0000271proc quote {txt} {
drhcc071302013-07-17 18:12:15 +0000272 return [string map {' ''} $txt]
273}
274
275# Output a title line
276#
277proc titleline {title} {
278 if {$title==""} {
279 puts [string repeat * 79]
280 } else {
281 set len [string length $title]
282 set stars [string repeat * [expr 79-$len-5]]
283 puts "*** $title $stars"
284 }
drh3e27c022004-07-23 00:01:38 +0000285}
286
drh3e27c022004-07-23 00:01:38 +0000287# Generate a single line of output in the statistics section of the
288# report.
289#
290proc statline {title value {extra {}}} {
291 set len [string length $title]
drhcc071302013-07-17 18:12:15 +0000292 set dots [string repeat . [expr 50-$len]]
drh3e27c022004-07-23 00:01:38 +0000293 set len [string length $value]
294 set sp2 [string range { } $len end]
295 if {$extra ne ""} {
296 set extra " $extra"
297 }
298 puts "$title$dots $value$sp2$extra"
299}
300
301# Generate a formatted percentage value for $num/$denom
302#
303proc percent {num denom {of {}}} {
304 if {$denom==0.0} {return ""}
305 set v [expr {$num*100.0/$denom}]
306 set of {}
drhfaf60c72005-03-29 13:18:16 +0000307 if {$v==100.0 || $v<0.001 || ($v>1.0 && $v<99.0)} {
drh3e27c022004-07-23 00:01:38 +0000308 return [format {%5.1f%% %s} $v $of]
309 } elseif {$v<0.1 || $v>99.9} {
310 return [format {%7.3f%% %s} $v $of]
311 } else {
312 return [format {%6.2f%% %s} $v $of]
313 }
314}
315
danielk197724c92532005-02-01 10:36:40 +0000316proc divide {num denom} {
317 if {$denom==0} {return 0.0}
318 return [format %.2f [expr double($num)/double($denom)]]
319}
320
drh3e27c022004-07-23 00:01:38 +0000321# Generate a subreport that covers some subset of the database.
322# the $where clause determines which subset to analyze.
323#
drh2f312ee2013-09-28 12:40:55 +0000324proc subreport {title where showFrag} {
drhb7708942011-10-05 18:18:13 +0000325 global pageSize file_pgcnt compressOverhead
danielk19770ba87cb2004-11-09 07:42:11 +0000326
327 # Query the in-memory database for the sum of various statistics
328 # for the subset of tables/indices identified by the WHERE clause in
329 # $where. Note that even if the WHERE clause matches no rows, the
330 # following query returns exactly one row (because it is an aggregate).
331 #
332 # The results of the query are stored directly by SQLite into local
333 # variables (i.e. $nentry, $nleaf etc.).
334 #
drh3e27c022004-07-23 00:01:38 +0000335 mem eval "
336 SELECT
danielk197724c92532005-02-01 10:36:40 +0000337 int(sum(nentry)) AS nentry,
338 int(sum(leaf_entries)) AS nleaf,
339 int(sum(payload)) AS payload,
340 int(sum(ovfl_payload)) AS ovfl_payload,
drh3e27c022004-07-23 00:01:38 +0000341 max(mx_payload) AS mx_payload,
danielk197724c92532005-02-01 10:36:40 +0000342 int(sum(ovfl_cnt)) as ovfl_cnt,
343 int(sum(leaf_pages)) AS leaf_pages,
344 int(sum(int_pages)) AS int_pages,
345 int(sum(ovfl_pages)) AS ovfl_pages,
346 int(sum(leaf_unused)) AS leaf_unused,
347 int(sum(int_unused)) AS int_unused,
drh50c67062007-02-10 19:22:35 +0000348 int(sum(ovfl_unused)) AS ovfl_unused,
drh4c9f1292011-09-28 00:50:14 +0000349 int(sum(gap_cnt)) AS gap_cnt,
drh8fb6c432015-08-04 14:18:10 +0000350 int(sum(compressed_size)) AS compressed_size,
351 int(max(depth)) AS depth,
352 count(*) AS cnt
danielk19770ba87cb2004-11-09 07:42:11 +0000353 FROM space_used WHERE $where" {} {}
354
355 # Output the sub-report title, nicely decorated with * characters.
356 #
drh3e27c022004-07-23 00:01:38 +0000357 puts ""
drhcc071302013-07-17 18:12:15 +0000358 titleline $title
drh3e27c022004-07-23 00:01:38 +0000359 puts ""
danielk19770ba87cb2004-11-09 07:42:11 +0000360
361 # Calculate statistics and store the results in TCL variables, as follows:
362 #
363 # total_pages: Database pages consumed.
364 # total_pages_percent: Pages consumed as a percentage of the file.
365 # storage: Bytes consumed.
366 # payload_percent: Payload bytes used as a percentage of $storage.
367 # total_unused: Unused bytes on pages.
368 # avg_payload: Average payload per btree entry.
369 # avg_fanout: Average fanout for internal pages.
370 # avg_unused: Average unused bytes per btree entry.
371 # ovfl_cnt_percent: Percentage of btree entries that use overflow pages.
372 #
drh3e27c022004-07-23 00:01:38 +0000373 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
danielk19770ba87cb2004-11-09 07:42:11 +0000374 set total_pages_percent [percent $total_pages $file_pgcnt]
drh3e27c022004-07-23 00:01:38 +0000375 set storage [expr {$total_pages*$pageSize}]
danielk19770ba87cb2004-11-09 07:42:11 +0000376 set payload_percent [percent $payload $storage {of storage consumed}]
377 set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}]
danielk197724c92532005-02-01 10:36:40 +0000378 set avg_payload [divide $payload $nleaf]
379 set avg_unused [divide $total_unused $nleaf]
drh3e27c022004-07-23 00:01:38 +0000380 if {$int_pages>0} {
danielk19770ba87cb2004-11-09 07:42:11 +0000381 # TODO: Is this formula correct?
danielk197724c92532005-02-01 10:36:40 +0000382 set nTab [mem eval "
383 SELECT count(*) FROM (
384 SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0
385 )
386 "]
387 set avg_fanout [mem eval "
388 SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used
drh8fb6c432015-08-04 14:18:10 +0000389 WHERE $where
danielk197724c92532005-02-01 10:36:40 +0000390 "]
391 set avg_fanout [format %.2f $avg_fanout]
drh3e27c022004-07-23 00:01:38 +0000392 }
danielk19770ba87cb2004-11-09 07:42:11 +0000393 set ovfl_cnt_percent [percent $ovfl_cnt $nleaf {of all entries}]
394
395 # Print out the sub-report statistics.
396 #
397 statline {Percentage of total database} $total_pages_percent
398 statline {Number of entries} $nleaf
399 statline {Bytes of storage consumed} $storage
drh4c9f1292011-09-28 00:50:14 +0000400 if {$compressed_size!=$storage} {
drhb7708942011-10-05 18:18:13 +0000401 set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}]
drh4c9f1292011-09-28 00:50:14 +0000402 set pct [expr {$compressed_size*100.0/$storage}]
403 set pct [format {%5.1f%%} $pct]
404 statline {Bytes used after compression} $compressed_size $pct
405 }
danielk19770ba87cb2004-11-09 07:42:11 +0000406 statline {Bytes of payload} $payload $payload_percent
drh8fb6c432015-08-04 14:18:10 +0000407 if {$cnt==1} {statline {B-tree depth} $depth}
danielk19770ba87cb2004-11-09 07:42:11 +0000408 statline {Average payload per entry} $avg_payload
409 statline {Average unused bytes per entry} $avg_unused
410 if {[info exists avg_fanout]} {
411 statline {Average fanout} $avg_fanout
412 }
drh2f312ee2013-09-28 12:40:55 +0000413 if {$showFrag && $total_pages>1} {
414 set fragmentation [percent $gap_cnt [expr {$total_pages-1}]]
415 statline {Non-sequential pages} $gap_cnt $fragmentation
drh50c67062007-02-10 19:22:35 +0000416 }
danielk19770ba87cb2004-11-09 07:42:11 +0000417 statline {Maximum payload per entry} $mx_payload
418 statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent
drh3e27c022004-07-23 00:01:38 +0000419 if {$int_pages>0} {
danielk19770ba87cb2004-11-09 07:42:11 +0000420 statline {Index pages used} $int_pages
drh3e27c022004-07-23 00:01:38 +0000421 }
danielk19770ba87cb2004-11-09 07:42:11 +0000422 statline {Primary pages used} $leaf_pages
423 statline {Overflow pages used} $ovfl_pages
424 statline {Total pages used} $total_pages
drh3e27c022004-07-23 00:01:38 +0000425 if {$int_unused>0} {
drh4515a452011-08-31 17:46:50 +0000426 set int_unused_percent [
427 percent $int_unused [expr {$int_pages*$pageSize}] {of index space}]
danielk19770ba87cb2004-11-09 07:42:11 +0000428 statline "Unused bytes on index pages" $int_unused $int_unused_percent
drh3e27c022004-07-23 00:01:38 +0000429 }
drh4515a452011-08-31 17:46:50 +0000430 statline "Unused bytes on primary pages" $leaf_unused [
431 percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}]
432 statline "Unused bytes on overflow pages" $ovfl_unused [
433 percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}]
434 statline "Unused bytes on all pages" $total_unused [
435 percent $total_unused $storage {of all space}]
drh3e27c022004-07-23 00:01:38 +0000436 return 1
437}
438
danielk197716254452004-11-08 16:15:09 +0000439# Calculate the overhead in pages caused by auto-vacuum.
440#
441# This procedure calculates and returns the number of pages used by the
442# auto-vacuum 'pointer-map'. If the database does not support auto-vacuum,
443# then 0 is returned. The two arguments are the size of the database file in
danielk19770ba87cb2004-11-09 07:42:11 +0000444# pages and the page size used by the database (in bytes).
danielk197716254452004-11-08 16:15:09 +0000445proc autovacuum_overhead {filePages pageSize} {
446
dan599e9d22010-07-12 08:39:37 +0000447 # Set $autovacuum to non-zero for databases that support auto-vacuum.
448 set autovacuum [db one {PRAGMA auto_vacuum}]
danielk197716254452004-11-08 16:15:09 +0000449
450 # If the database is not an auto-vacuum database or the file consists
451 # of one page only then there is no overhead for auto-vacuum. Return zero.
dan599e9d22010-07-12 08:39:37 +0000452 if {0==$autovacuum || $filePages==1} {
danielk197716254452004-11-08 16:15:09 +0000453 return 0
454 }
455
456 # The number of entries on each pointer map page. The layout of the
457 # database file is one pointer-map page, followed by $ptrsPerPage other
458 # pages, followed by a pointer-map page etc. The first pointer-map page
459 # is the second page of the file overall.
460 set ptrsPerPage [expr double($pageSize/5)]
461
462 # Return the number of pointer map pages in the database.
drhf08f3842011-09-27 13:40:26 +0000463 return [expr wide(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))]
danielk197716254452004-11-08 16:15:09 +0000464}
465
danielk197716254452004-11-08 16:15:09 +0000466
danielk19770ba87cb2004-11-09 07:42:11 +0000467# Calculate the summary statistics for the database and store the results
468# in TCL variables. They are output below. Variables are as follows:
danielk197716254452004-11-08 16:15:09 +0000469#
470# pageSize: Size of each page in bytes.
471# file_bytes: File size in bytes.
472# file_pgcnt: Number of pages in the file.
473# file_pgcnt2: Number of pages in the file (calculated).
474# av_pgcnt: Pages consumed by the auto-vacuum pointer-map.
475# av_percent: Percentage of the file consumed by auto-vacuum pointer-map.
476# inuse_pgcnt: Data pages in the file.
477# inuse_percent: Percentage of pages used to store data.
478# free_pgcnt: Free pages calculated as (<total pages> - <in-use pages>)
479# free_pgcnt2: Free pages in the file according to the file header.
480# free_percent: Percentage of file consumed by free pages (calculated).
481# free_percent2: Percentage of file consumed by free pages (header).
482# ntable: Number of tables in the db.
483# nindex: Number of indices in the db.
484# nautoindex: Number of indices created automatically.
485# nmanindex: Number of indices created manually.
danielk19770ba87cb2004-11-09 07:42:11 +0000486# user_payload: Number of bytes of payload in table btrees
487# (not including sqlite_master)
488# user_percent: $user_payload as a percentage of total file size.
danielk197716254452004-11-08 16:15:09 +0000489
dan64b41c72011-09-26 19:32:47 +0000490### The following, setting $file_bytes based on the actual size of the file
491### on disk, causes this tool to choke on zipvfs databases. So set it based
492### on the return of [PRAGMA page_count] instead.
493if 0 {
494 set file_bytes [file size $file_to_analyze]
495 set file_pgcnt [expr {$file_bytes/$pageSize}]
496}
497set file_pgcnt [db one {PRAGMA page_count}]
drhf08f3842011-09-27 13:40:26 +0000498set file_bytes [expr {$file_pgcnt * $pageSize}]
danielk197716254452004-11-08 16:15:09 +0000499
500set av_pgcnt [autovacuum_overhead $file_pgcnt $pageSize]
501set av_percent [percent $av_pgcnt $file_pgcnt]
502
danielk19770ba87cb2004-11-09 07:42:11 +0000503set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used}
drhf08f3842011-09-27 13:40:26 +0000504set inuse_pgcnt [expr wide([mem eval $sql])]
danielk197716254452004-11-08 16:15:09 +0000505set inuse_percent [percent $inuse_pgcnt $file_pgcnt]
506
drhf08f3842011-09-27 13:40:26 +0000507set free_pgcnt [expr {$file_pgcnt-$inuse_pgcnt-$av_pgcnt}]
danielk197716254452004-11-08 16:15:09 +0000508set free_percent [percent $free_pgcnt $file_pgcnt]
dan599e9d22010-07-12 08:39:37 +0000509set free_pgcnt2 [db one {PRAGMA freelist_count}]
danielk197716254452004-11-08 16:15:09 +0000510set free_percent2 [percent $free_pgcnt2 $file_pgcnt]
511
512set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}]
drh3e27c022004-07-23 00:01:38 +0000513
514set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}]
drh3e27c022004-07-23 00:01:38 +0000515set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}]
danielk19770ba87cb2004-11-09 07:42:11 +0000516set sql {SELECT count(*) FROM sqlite_master WHERE name LIKE 'sqlite_autoindex%'}
517set nautoindex [db eval $sql]
danielk197716254452004-11-08 16:15:09 +0000518set nmanindex [expr {$nindex-$nautoindex}]
519
520# set total_payload [mem eval "SELECT sum(payload) FROM space_used"]
danielk197724c92532005-02-01 10:36:40 +0000521set user_payload [mem one {SELECT int(sum(payload)) FROM space_used
drh3e27c022004-07-23 00:01:38 +0000522 WHERE NOT is_index AND name NOT LIKE 'sqlite_master'}]
danielk19770ba87cb2004-11-09 07:42:11 +0000523set user_percent [percent $user_payload $file_bytes]
danielk197716254452004-11-08 16:15:09 +0000524
danielk19770ba87cb2004-11-09 07:42:11 +0000525# Output the summary statistics calculated above.
526#
drh453a3122012-10-10 10:52:46 +0000527puts "/** Disk-Space Utilization Report For $root_filename"
danielk19770ba87cb2004-11-09 07:42:11 +0000528puts ""
danielk197716254452004-11-08 16:15:09 +0000529statline {Page size in bytes} $pageSize
530statline {Pages in the whole file (measured)} $file_pgcnt
531statline {Pages in the whole file (calculated)} $file_pgcnt2
532statline {Pages that store data} $inuse_pgcnt $inuse_percent
533statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2
534statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent
535statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent
536statline {Number of tables in the database} $ntable
537statline {Number of indices} $nindex
drh2f312ee2013-09-28 12:40:55 +0000538statline {Number of defined indices} $nmanindex
539statline {Number of implied indices} $nautoindex
drhb7708942011-10-05 18:18:13 +0000540if {$isCompressed} {
541 statline {Size of uncompressed content in bytes} $file_bytes
542 set efficiency [percent $true_file_size $file_bytes]
543 statline {Size of compressed file on disk} $true_file_size $efficiency
544} else {
545 statline {Size of the file in bytes} $file_bytes
546}
danielk19770ba87cb2004-11-09 07:42:11 +0000547statline {Bytes of user payload stored} $user_payload $user_percent
drh3e27c022004-07-23 00:01:38 +0000548
549# Output table rankings
550#
551puts ""
drhcc071302013-07-17 18:12:15 +0000552titleline "Page counts for all tables with their indices"
drh3e27c022004-07-23 00:01:38 +0000553puts ""
554mem eval {SELECT tblname, count(*) AS cnt,
danielk197724c92532005-02-01 10:36:40 +0000555 int(sum(int_pages+leaf_pages+ovfl_pages)) AS size
drhfaf60c72005-03-29 13:18:16 +0000556 FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} {
drh3e27c022004-07-23 00:01:38 +0000557 statline [string toupper $tblname] $size [percent $size $file_pgcnt]
558}
drhcc071302013-07-17 18:12:15 +0000559puts ""
560titleline "Page counts for all tables and indices separately"
561puts ""
562mem eval {
563 SELECT
564 upper(name) AS nm,
565 int(int_pages+leaf_pages+ovfl_pages) AS size
566 FROM space_used
567 ORDER BY size+0 DESC, name} {} {
568 statline $nm $size [percent $size $file_pgcnt]
569}
drhb7708942011-10-05 18:18:13 +0000570if {$isCompressed} {
571 puts ""
drhcc071302013-07-17 18:12:15 +0000572 titleline "Bytes of disk space used after compression"
drhb7708942011-10-05 18:18:13 +0000573 puts ""
574 set csum 0
575 mem eval {SELECT tblname,
576 int(sum(compressed_size)) +
577 $compressOverhead*sum(int_pages+leaf_pages+ovfl_pages)
578 AS csize
579 FROM space_used GROUP BY tblname ORDER BY csize+0 DESC, tblname} {} {
580 incr csum $csize
581 statline [string toupper $tblname] $csize [percent $csize $true_file_size]
582 }
583 set overhead [expr {$true_file_size - $csum}]
584 if {$overhead>0} {
585 statline {Header and free space} $overhead [percent $overhead $true_file_size]
586 }
587}
drh3e27c022004-07-23 00:01:38 +0000588
589# Output subreports
590#
591if {$nindex>0} {
drh2f312ee2013-09-28 12:40:55 +0000592 subreport {All tables and indices} 1 0
drh3e27c022004-07-23 00:01:38 +0000593}
drh2f312ee2013-09-28 12:40:55 +0000594subreport {All tables} {NOT is_index} 0
drh3e27c022004-07-23 00:01:38 +0000595if {$nindex>0} {
drh2f312ee2013-09-28 12:40:55 +0000596 subreport {All indices} {is_index} 0
drh3e27c022004-07-23 00:01:38 +0000597}
drh7913e412013-11-01 20:30:36 +0000598foreach tbl [mem eval {SELECT DISTINCT tblname name FROM space_used
drh3e27c022004-07-23 00:01:38 +0000599 ORDER BY name}] {
drhcc071302013-07-17 18:12:15 +0000600 set qn [quote $tbl]
drh3e27c022004-07-23 00:01:38 +0000601 set name [string toupper $tbl]
drhcc071302013-07-17 18:12:15 +0000602 set n [mem eval {SELECT count(*) FROM space_used WHERE tblname=$tbl}]
drh3e27c022004-07-23 00:01:38 +0000603 if {$n>1} {
drhcc071302013-07-17 18:12:15 +0000604 set idxlist [mem eval "SELECT name FROM space_used
605 WHERE tblname='$qn' AND is_index
606 ORDER BY 1"]
drh2f312ee2013-09-28 12:40:55 +0000607 subreport "Table $name and all its indices" "tblname='$qn'" 0
608 subreport "Table $name w/o any indices" "name='$qn'" 1
drhcc071302013-07-17 18:12:15 +0000609 if {[llength $idxlist]>1} {
drh2f312ee2013-09-28 12:40:55 +0000610 subreport "Indices of table $name" "tblname='$qn' AND is_index" 0
drhcc071302013-07-17 18:12:15 +0000611 }
612 foreach idx $idxlist {
613 set qidx [quote $idx]
drh2f312ee2013-09-28 12:40:55 +0000614 subreport "Index [string toupper $idx] of table $name" "name='$qidx'" 1
drhcc071302013-07-17 18:12:15 +0000615 }
drh3e27c022004-07-23 00:01:38 +0000616 } else {
drh2f312ee2013-09-28 12:40:55 +0000617 subreport "Table $name" "name='$qn'" 1
drh3e27c022004-07-23 00:01:38 +0000618 }
619}
620
621# Output instructions on what the numbers above mean.
622#
drhcc071302013-07-17 18:12:15 +0000623puts ""
624titleline Definitions
drh3e27c022004-07-23 00:01:38 +0000625puts {
drh3e27c022004-07-23 00:01:38 +0000626Page size in bytes
627
628 The number of bytes in a single page of the database file.
629 Usually 1024.
630
631Number of pages in the whole file
632}
drh4515a452011-08-31 17:46:50 +0000633puts " The number of $pageSize-byte pages that go into forming the complete
drh3e27c022004-07-23 00:01:38 +0000634 database"
drh4515a452011-08-31 17:46:50 +0000635puts {
drh3e27c022004-07-23 00:01:38 +0000636Pages that store data
637
638 The number of pages that store data, either as primary B*Tree pages or
639 as overflow pages. The number at the right is the data pages divided by
640 the total number of pages in the file.
641
642Pages on the freelist
643
644 The number of pages that are not currently in use but are reserved for
645 future use. The percentage at the right is the number of freelist pages
646 divided by the total number of pages in the file.
647
danielk197716254452004-11-08 16:15:09 +0000648Pages of auto-vacuum overhead
649
650 The number of pages that store data used by the database to facilitate
651 auto-vacuum. This is zero for databases that do not support auto-vacuum.
652
drh3e27c022004-07-23 00:01:38 +0000653Number of tables in the database
654
655 The number of tables in the database, including the SQLITE_MASTER table
656 used to store schema information.
657
658Number of indices
659
660 The total number of indices in the database.
661
drh2f312ee2013-09-28 12:40:55 +0000662Number of defined indices
drh3e27c022004-07-23 00:01:38 +0000663
664 The number of indices created using an explicit CREATE INDEX statement.
665
drh2f312ee2013-09-28 12:40:55 +0000666Number of implied indices
drh3e27c022004-07-23 00:01:38 +0000667
668 The number of indices used to implement PRIMARY KEY or UNIQUE constraints
669 on tables.
670
671Size of the file in bytes
672
673 The total amount of disk space used by the entire database files.
674
675Bytes of user payload stored
676
677 The total number of bytes of user payload stored in the database. The
678 schema information in the SQLITE_MASTER table is not counted when
679 computing this number. The percentage at the right shows the payload
680 divided by the total file size.
681
682Percentage of total database
683
684 The amount of the complete database file that is devoted to storing
685 information described by this category.
686
687Number of entries
688
689 The total number of B-Tree key/value pairs stored under this category.
690
691Bytes of storage consumed
692
693 The total amount of disk space required to store all B-Tree entries
694 under this category. The is the total number of pages used times
695 the pages size.
696
697Bytes of payload
698
699 The amount of payload stored under this category. Payload is the data
700 part of table entries and the key part of index entries. The percentage
701 at the right is the bytes of payload divided by the bytes of storage
702 consumed.
703
704Average payload per entry
705
706 The average amount of payload on each entry. This is just the bytes of
707 payload divided by the number of entries.
708
709Average unused bytes per entry
710
711 The average amount of free space remaining on all pages under this
712 category on a per-entry basis. This is the number of unused bytes on
713 all pages divided by the number of entries.
714
drh2f312ee2013-09-28 12:40:55 +0000715Non-sequential pages
drhfc6e0c92007-02-13 01:41:52 +0000716
drh2f312ee2013-09-28 12:40:55 +0000717 The number of pages in the table or index that are out of sequence.
718 Many filesystems are optimized for sequential file access so a small
719 number of non-sequential pages might result in faster queries,
720 especially for larger database files that do not fit in the disk cache.
721 Note that after running VACUUM, the root page of each table or index is
722 at the beginning of the database file and all other pages are in a
723 separate part of the database file, resulting in a single non-
724 sequential page.
drhfc6e0c92007-02-13 01:41:52 +0000725
drh3e27c022004-07-23 00:01:38 +0000726Maximum payload per entry
727
728 The largest payload size of any entry.
729
730Entries that use overflow
731
732 The number of entries that user one or more overflow pages.
733
734Total pages used
735
736 This is the number of pages used to hold all information in the current
737 category. This is the sum of index, primary, and overflow pages.
738
739Index pages used
740
741 This is the number of pages in a table B-tree that hold only key (rowid)
742 information and no data.
743
744Primary pages used
745
746 This is the number of B-tree pages that hold both key and data.
747
748Overflow pages used
749
750 The total number of overflow pages used for this category.
751
752Unused bytes on index pages
753
754 The total number of bytes of unused space on all index pages. The
755 percentage at the right is the number of unused bytes divided by the
756 total number of bytes on index pages.
757
758Unused bytes on primary pages
759
760 The total number of bytes of unused space on all primary pages. The
761 percentage at the right is the number of unused bytes divided by the
762 total number of bytes on primary pages.
763
764Unused bytes on overflow pages
765
766 The total number of bytes of unused space on all overflow pages. The
767 percentage at the right is the number of unused bytes divided by the
768 total number of bytes on overflow pages.
769
770Unused bytes on all pages
771
772 The total number of bytes of unused space on all primary and overflow
773 pages. The percentage at the right is the number of unused bytes
774 divided by the total number of bytes.
775}
776
danielk19770ba87cb2004-11-09 07:42:11 +0000777# Output a dump of the in-memory database. This can be used for more
778# complex offline analysis.
drh3e27c022004-07-23 00:01:38 +0000779#
drhcc071302013-07-17 18:12:15 +0000780titleline {}
drh3e27c022004-07-23 00:01:38 +0000781puts "The entire text of this report can be sourced into any SQL database"
782puts "engine for further analysis. All of the text above is an SQL comment."
783puts "The data used to generate this report follows:"
784puts "*/"
785puts "BEGIN;"
786puts $tabledef
787unset -nocomplain x
788mem eval {SELECT * FROM space_used} x {
789 puts -nonewline "INSERT INTO space_used VALUES"
790 set sep (
791 foreach col $x(*) {
792 set v $x($col)
drha4641712013-11-02 11:34:58 +0000793 if {$v=="" || ![string is double $v]} {set v '[quote $v]'}
drh3e27c022004-07-23 00:01:38 +0000794 puts -nonewline $sep$v
795 set sep ,
796 }
797 puts ");"
798}
799puts "COMMIT;"
drha7531c62006-01-24 02:19:53 +0000800
801} err]} {
802 puts "ERROR: $err"
803 puts $errorInfo
804 exit 1
805}