blob: 9db108dc26bc802a899b8b29a2788dd3677f2013 [file] [log] [blame]
drh3e27c022004-07-23 00:01:38 +00001# Run this TCL script using "testfixture" in order get a report that shows
2# how much disk space is used by a particular data to actually store data
3# versus how much space is unused.
4#
5
drha7531c62006-01-24 02:19:53 +00006if {[catch {
dan9fab5ed2015-02-09 17:46:11 +00007
8# Argument $tname is the name of a table within the database opened by
9# database handle [db]. Return true if it is a WITHOUT ROWID table, or
10# false otherwise.
11#
12proc is_without_rowid {tname} {
13 set t [string map {' ''} $tname]
14 db eval "PRAGMA index_list = '$t'" o {
15 if {$o(origin) == "pk"} {
16 set n $o(name)
17 if {0==[db one { SELECT count(*) FROM sqlite_master WHERE name=$n }]} {
18 return 1
19 }
20 }
21 }
22 return 0
23}
24
drh43269742016-10-12 18:26:26 +000025# Read and run TCL commands from standard input. Used to implement
26# the --tclsh option.
27#
28proc tclsh {} {
29 set line {}
30 while {![eof stdin]} {
31 if {$line!=""} {
32 puts -nonewline "> "
33 } else {
34 puts -nonewline "% "
35 }
36 flush stdout
37 append line [gets stdin]
38 if {[info complete $line]} {
39 if {[catch {uplevel #0 $line} result]} {
40 puts stderr "Error: $result"
41 } elseif {$result!=""} {
42 puts $result
43 }
44 set line {}
45 } else {
46 append line \n
47 }
48 }
49}
50
51
drh3e27c022004-07-23 00:01:38 +000052# Get the name of the database to analyze
53#
drhb7708942011-10-05 18:18:13 +000054proc usage {} {
55 set argv0 [file rootname [file tail [info nameofexecutable]]]
drh310a8d62015-09-08 17:31:30 +000056 puts stderr "Usage: $argv0 ?--pageinfo? ?--stats? database-filename"
57 puts stderr {
58Analyze the SQLite3 database file specified by the "database-filename"
59argument and output a report detailing size and storage efficiency
60information for the database and its constituent tables and indexes.
61
62Options:
63
drh43269742016-10-12 18:26:26 +000064 --pageinfo Show how each page of the database-file is used
drh310a8d62015-09-08 17:31:30 +000065
drh43269742016-10-12 18:26:26 +000066 --stats Output SQL text that creates a new database containing
67 statistics about the database that was analyzed
68
69 --tclsh Run the built-in TCL interpreter interactively (for debugging)
70
71 --version Show the version number of SQLite
drh310a8d62015-09-08 17:31:30 +000072}
drh3e27c022004-07-23 00:01:38 +000073 exit 1
74}
drhb7708942011-10-05 18:18:13 +000075set file_to_analyze {}
76set flags(-pageinfo) 0
drha624fd52011-10-05 19:46:03 +000077set flags(-stats) 0
drh43269742016-10-12 18:26:26 +000078set flags(-debug) 0
drhb7708942011-10-05 18:18:13 +000079append argv {}
80foreach arg $argv {
81 if {[regexp {^-+pageinfo$} $arg]} {
82 set flags(-pageinfo) 1
drha624fd52011-10-05 19:46:03 +000083 } elseif {[regexp {^-+stats$} $arg]} {
84 set flags(-stats) 1
drh43269742016-10-12 18:26:26 +000085 } elseif {[regexp {^-+debug$} $arg]} {
86 set flags(-debug) 1
87 } elseif {[regexp {^-+tclsh$} $arg]} {
88 tclsh
89 exit 0
90 } elseif {[regexp {^-+version$} $arg]} {
91 sqlite3 mem :memory:
92 puts [mem one {SELECT sqlite_version()||' '||sqlite_source_id()}]
93 mem close
94 exit 0
drhb7708942011-10-05 18:18:13 +000095 } elseif {[regexp {^-} $arg]} {
96 puts stderr "Unknown option: $arg"
97 usage
98 } elseif {$file_to_analyze!=""} {
99 usage
100 } else {
101 set file_to_analyze $arg
102 }
103}
104if {$file_to_analyze==""} usage
drh453a3122012-10-10 10:52:46 +0000105set root_filename $file_to_analyze
106regexp {^file:(//)?([^?]*)} $file_to_analyze all x1 root_filename
107if {![file exists $root_filename]} {
108 puts stderr "No such file: $root_filename"
drh3e27c022004-07-23 00:01:38 +0000109 exit 1
110}
drh453a3122012-10-10 10:52:46 +0000111if {![file readable $root_filename]} {
112 puts stderr "File is not readable: $root_filename"
drh3e27c022004-07-23 00:01:38 +0000113 exit 1
114}
drh453a3122012-10-10 10:52:46 +0000115set true_file_size [file size $root_filename]
drhb7708942011-10-05 18:18:13 +0000116if {$true_file_size<512} {
drh453a3122012-10-10 10:52:46 +0000117 puts stderr "Empty or malformed database: $root_filename"
drh3e27c022004-07-23 00:01:38 +0000118 exit 1
119}
120
drh36c06322011-10-10 16:06:35 +0000121# Compute the total file size assuming test_multiplexor is being used.
122# Assume that SQLITE_ENABLE_8_3_NAMES might be enabled
123#
drh453a3122012-10-10 10:52:46 +0000124set extension [file extension $root_filename]
125set pattern $root_filename
drh37002622012-04-06 00:09:27 +0000126append pattern {[0-3][0-9][0-9]}
drh36c06322011-10-10 16:06:35 +0000127foreach f [glob -nocomplain $pattern] {
128 incr true_file_size [file size $f]
129 set extension {}
130}
131if {[string length $extension]>=2 && [string length $extension]<=4} {
drh453a3122012-10-10 10:52:46 +0000132 set pattern [file rootname $root_filename]
drh37002622012-04-06 00:09:27 +0000133 append pattern {.[0-3][0-9][0-9]}
drh36c06322011-10-10 16:06:35 +0000134 foreach f [glob -nocomplain $pattern] {
135 incr true_file_size [file size $f]
136 }
137}
138
drh3e27c022004-07-23 00:01:38 +0000139# Open the database
140#
drh453a3122012-10-10 10:52:46 +0000141if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} {
142 puts stderr "error trying to open $file_to_analyze: $msg"
143 exit 1
144}
drh43269742016-10-12 18:26:26 +0000145if {$flags(-debug)} {
146 proc dbtrace {txt} {puts $txt; flush stdout;}
147 db trace ::dbtrace
148}
dan599e9d22010-07-12 08:39:37 +0000149
drh565621a2011-09-21 20:10:42 +0000150db eval {SELECT count(*) FROM sqlite_master}
drhf08f3842011-09-27 13:40:26 +0000151set pageSize [expr {wide([db one {PRAGMA page_size}])}]
drh3e27c022004-07-23 00:01:38 +0000152
drha624fd52011-10-05 19:46:03 +0000153if {$flags(-pageinfo)} {
154 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
155 db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} {
156 puts "$pageno $name $path"
157 }
158 exit 0
159}
160if {$flags(-stats)} {
161 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
162 puts "BEGIN;"
163 puts "CREATE TABLE stats("
164 puts " name STRING, /* Name of table or index */"
165 puts " path INTEGER, /* Path to page from root */"
166 puts " pageno INTEGER, /* Page number */"
167 puts " pagetype STRING, /* 'internal', 'leaf' or 'overflow' */"
168 puts " ncell INTEGER, /* Cells on page (0 for overflow) */"
169 puts " payload INTEGER, /* Bytes of payload on this page */"
170 puts " unused INTEGER, /* Bytes of unused space on this page */"
171 puts " mx_payload INTEGER, /* Largest payload size of all cells */"
172 puts " pgoffset INTEGER, /* Offset of page in file */"
173 puts " pgsize INTEGER /* Size of the page */"
174 puts ");"
175 db eval {SELECT quote(name) || ',' ||
176 quote(path) || ',' ||
177 quote(pageno) || ',' ||
178 quote(pagetype) || ',' ||
179 quote(ncell) || ',' ||
180 quote(payload) || ',' ||
181 quote(unused) || ',' ||
182 quote(mx_payload) || ',' ||
183 quote(pgoffset) || ',' ||
184 quote(pgsize) AS x FROM stat} {
185 puts "INSERT INTO stats VALUES($x);"
186 }
187 puts "COMMIT;"
188 exit 0
189}
190
drh43269742016-10-12 18:26:26 +0000191
danielk19770ba87cb2004-11-09 07:42:11 +0000192# In-memory database for collecting statistics. This script loops through
193# the tables and indices in the database being analyzed, adding a row for each
194# to an in-memory database (for which the schema is shown below). It then
195# queries the in-memory db to produce the space-analysis report.
drh3e27c022004-07-23 00:01:38 +0000196#
197sqlite3 mem :memory:
drh43269742016-10-12 18:26:26 +0000198if {$flags(-debug)} {
199 proc dbtrace {txt} {puts $txt; flush stdout;}
200 mem trace ::dbtrace
201}
drh4515a452011-08-31 17:46:50 +0000202set tabledef {CREATE TABLE space_used(
drh3e27c022004-07-23 00:01:38 +0000203 name clob, -- Name of a table or index in the database file
204 tblname clob, -- Name of associated table
205 is_index boolean, -- TRUE if it is an index, false for a table
dand49c6932016-05-06 15:16:02 +0000206 is_without_rowid boolean, -- TRUE if WITHOUT ROWID table
drh3e27c022004-07-23 00:01:38 +0000207 nentry int, -- Number of entries in the BTree
208 leaf_entries int, -- Number of leaf entries
drh8fb6c432015-08-04 14:18:10 +0000209 depth int, -- Depth of the b-tree
drh3e27c022004-07-23 00:01:38 +0000210 payload int, -- Total amount of data stored in this table or index
211 ovfl_payload int, -- Total amount of data stored on overflow pages
212 ovfl_cnt int, -- Number of entries that use overflow
213 mx_payload int, -- Maximum payload size
214 int_pages int, -- Number of interior pages used
215 leaf_pages int, -- Number of leaf pages used
216 ovfl_pages int, -- Number of overflow pages used
217 int_unused int, -- Number of unused bytes on interior pages
218 leaf_unused int, -- Number of unused bytes on primary pages
drh50c67062007-02-10 19:22:35 +0000219 ovfl_unused int, -- Number of unused bytes on overflow pages
drh4c9f1292011-09-28 00:50:14 +0000220 gap_cnt int, -- Number of gaps in the page layout
221 compressed_size int -- Total bytes stored on disk
drh3e27c022004-07-23 00:01:38 +0000222);}
223mem eval $tabledef
224
dan599e9d22010-07-12 08:39:37 +0000225# Create a temporary "dbstat" virtual table.
226#
drh565621a2011-09-21 20:10:42 +0000227db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
228db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat
229 ORDER BY name, path}
230db eval {DROP TABLE temp.stat}
dan599e9d22010-07-12 08:39:37 +0000231
drhb7708942011-10-05 18:18:13 +0000232set isCompressed 0
233set compressOverhead 0
drh8fb6c432015-08-04 14:18:10 +0000234set depth 0
dan599e9d22010-07-12 08:39:37 +0000235set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 }
236foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] {
237
238 set is_index [expr {$name!=$tblname}]
dand49c6932016-05-06 15:16:02 +0000239 set is_without_rowid [is_without_rowid $name]
dan599e9d22010-07-12 08:39:37 +0000240 db eval {
241 SELECT
242 sum(ncell) AS nentry,
drh8fb6c432015-08-04 14:18:10 +0000243 sum((pagetype=='leaf')*ncell) AS leaf_entries,
dan599e9d22010-07-12 08:39:37 +0000244 sum(payload) AS payload,
drh8fb6c432015-08-04 14:18:10 +0000245 sum((pagetype=='overflow') * payload) AS ovfl_payload,
dan599e9d22010-07-12 08:39:37 +0000246 sum(path LIKE '%+000000') AS ovfl_cnt,
247 max(mx_payload) AS mx_payload,
drh8fb6c432015-08-04 14:18:10 +0000248 sum(pagetype=='internal') AS int_pages,
249 sum(pagetype=='leaf') AS leaf_pages,
250 sum(pagetype=='overflow') AS ovfl_pages,
251 sum((pagetype=='internal') * unused) AS int_unused,
252 sum((pagetype=='leaf') * unused) AS leaf_unused,
253 sum((pagetype=='overflow') * unused) AS ovfl_unused,
254 sum(pgsize) AS compressed_size,
255 max((length(CASE WHEN path LIKE '%+%' THEN '' ELSE path END)+3)/4)
256 AS depth
dan599e9d22010-07-12 08:39:37 +0000257 FROM temp.dbstat WHERE name = $name
258 } break
259
drhb7708942011-10-05 18:18:13 +0000260 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
261 set storage [expr {$total_pages*$pageSize}]
262 if {!$isCompressed && $storage>$compressed_size} {
263 set isCompressed 1
264 set compressOverhead 14
265 }
266
dan599e9d22010-07-12 08:39:37 +0000267 # Column 'gap_cnt' is set to the number of non-contiguous entries in the
268 # list of pages visited if the b-tree structure is traversed in a top-down
269 # fashion (each node visited before its child-tree is passed). Any overflow
270 # chains present are traversed from start to finish before any child-tree
271 # is.
272 #
273 set gap_cnt 0
drh2f312ee2013-09-28 12:40:55 +0000274 set prev 0
275 db eval {
276 SELECT pageno, pagetype FROM temp.dbstat
277 WHERE name=$name
278 ORDER BY pageno
279 } {
280 if {$prev>0 && $pagetype=="leaf" && $pageno!=$prev+1} {
281 incr gap_cnt
282 }
283 set prev $pageno
dan599e9d22010-07-12 08:39:37 +0000284 }
dan599e9d22010-07-12 08:39:37 +0000285 mem eval {
286 INSERT INTO space_used VALUES(
287 $name,
288 $tblname,
289 $is_index,
dand49c6932016-05-06 15:16:02 +0000290 $is_without_rowid,
dan599e9d22010-07-12 08:39:37 +0000291 $nentry,
292 $leaf_entries,
drh8fb6c432015-08-04 14:18:10 +0000293 $depth,
dan599e9d22010-07-12 08:39:37 +0000294 $payload,
295 $ovfl_payload,
296 $ovfl_cnt,
297 $mx_payload,
298 $int_pages,
299 $leaf_pages,
300 $ovfl_pages,
301 $int_unused,
302 $leaf_unused,
303 $ovfl_unused,
drh4c9f1292011-09-28 00:50:14 +0000304 $gap_cnt,
305 $compressed_size
dan599e9d22010-07-12 08:39:37 +0000306 );
307 }
308}
309
danielk197724c92532005-02-01 10:36:40 +0000310proc integerify {real} {
drh03496882007-12-04 13:41:51 +0000311 if {[string is double -strict $real]} {
drhf08f3842011-09-27 13:40:26 +0000312 return [expr {wide($real)}]
drh03496882007-12-04 13:41:51 +0000313 } else {
314 return 0
315 }
danielk197724c92532005-02-01 10:36:40 +0000316}
317mem function int integerify
318
danielk19770ba87cb2004-11-09 07:42:11 +0000319# Quote a string for use in an SQL query. Examples:
drh3e27c022004-07-23 00:01:38 +0000320#
danielk19770ba87cb2004-11-09 07:42:11 +0000321# [quote {hello world}] == {'hello world'}
322# [quote {hello world's}] == {'hello world''s'}
drh3e27c022004-07-23 00:01:38 +0000323#
danielk19770ba87cb2004-11-09 07:42:11 +0000324proc quote {txt} {
drhcc071302013-07-17 18:12:15 +0000325 return [string map {' ''} $txt]
326}
327
328# Output a title line
329#
330proc titleline {title} {
331 if {$title==""} {
332 puts [string repeat * 79]
333 } else {
334 set len [string length $title]
335 set stars [string repeat * [expr 79-$len-5]]
336 puts "*** $title $stars"
337 }
drh3e27c022004-07-23 00:01:38 +0000338}
339
drh3e27c022004-07-23 00:01:38 +0000340# Generate a single line of output in the statistics section of the
341# report.
342#
343proc statline {title value {extra {}}} {
344 set len [string length $title]
drhcc071302013-07-17 18:12:15 +0000345 set dots [string repeat . [expr 50-$len]]
drh3e27c022004-07-23 00:01:38 +0000346 set len [string length $value]
347 set sp2 [string range { } $len end]
348 if {$extra ne ""} {
349 set extra " $extra"
350 }
351 puts "$title$dots $value$sp2$extra"
352}
353
354# Generate a formatted percentage value for $num/$denom
355#
356proc percent {num denom {of {}}} {
357 if {$denom==0.0} {return ""}
358 set v [expr {$num*100.0/$denom}]
359 set of {}
drhfaf60c72005-03-29 13:18:16 +0000360 if {$v==100.0 || $v<0.001 || ($v>1.0 && $v<99.0)} {
drh3e27c022004-07-23 00:01:38 +0000361 return [format {%5.1f%% %s} $v $of]
362 } elseif {$v<0.1 || $v>99.9} {
363 return [format {%7.3f%% %s} $v $of]
364 } else {
365 return [format {%6.2f%% %s} $v $of]
366 }
367}
368
danielk197724c92532005-02-01 10:36:40 +0000369proc divide {num denom} {
370 if {$denom==0} {return 0.0}
371 return [format %.2f [expr double($num)/double($denom)]]
372}
373
drh3e27c022004-07-23 00:01:38 +0000374# Generate a subreport that covers some subset of the database.
375# the $where clause determines which subset to analyze.
376#
drh2f312ee2013-09-28 12:40:55 +0000377proc subreport {title where showFrag} {
drhb7708942011-10-05 18:18:13 +0000378 global pageSize file_pgcnt compressOverhead
danielk19770ba87cb2004-11-09 07:42:11 +0000379
380 # Query the in-memory database for the sum of various statistics
381 # for the subset of tables/indices identified by the WHERE clause in
382 # $where. Note that even if the WHERE clause matches no rows, the
383 # following query returns exactly one row (because it is an aggregate).
384 #
385 # The results of the query are stored directly by SQLite into local
dand49c6932016-05-06 15:16:02 +0000386 # variables (i.e. $nentry, $payload etc.).
danielk19770ba87cb2004-11-09 07:42:11 +0000387 #
drh3e27c022004-07-23 00:01:38 +0000388 mem eval "
389 SELECT
dand49c6932016-05-06 15:16:02 +0000390 int(sum(
391 CASE WHEN (is_without_rowid OR is_index) THEN nentry
392 ELSE leaf_entries
393 END
394 )) AS nentry,
danielk197724c92532005-02-01 10:36:40 +0000395 int(sum(payload)) AS payload,
396 int(sum(ovfl_payload)) AS ovfl_payload,
drh3e27c022004-07-23 00:01:38 +0000397 max(mx_payload) AS mx_payload,
danielk197724c92532005-02-01 10:36:40 +0000398 int(sum(ovfl_cnt)) as ovfl_cnt,
399 int(sum(leaf_pages)) AS leaf_pages,
400 int(sum(int_pages)) AS int_pages,
401 int(sum(ovfl_pages)) AS ovfl_pages,
402 int(sum(leaf_unused)) AS leaf_unused,
403 int(sum(int_unused)) AS int_unused,
drh50c67062007-02-10 19:22:35 +0000404 int(sum(ovfl_unused)) AS ovfl_unused,
drh4c9f1292011-09-28 00:50:14 +0000405 int(sum(gap_cnt)) AS gap_cnt,
drh8fb6c432015-08-04 14:18:10 +0000406 int(sum(compressed_size)) AS compressed_size,
407 int(max(depth)) AS depth,
408 count(*) AS cnt
danielk19770ba87cb2004-11-09 07:42:11 +0000409 FROM space_used WHERE $where" {} {}
410
411 # Output the sub-report title, nicely decorated with * characters.
412 #
drh3e27c022004-07-23 00:01:38 +0000413 puts ""
drhcc071302013-07-17 18:12:15 +0000414 titleline $title
drh3e27c022004-07-23 00:01:38 +0000415 puts ""
danielk19770ba87cb2004-11-09 07:42:11 +0000416
417 # Calculate statistics and store the results in TCL variables, as follows:
418 #
419 # total_pages: Database pages consumed.
420 # total_pages_percent: Pages consumed as a percentage of the file.
421 # storage: Bytes consumed.
422 # payload_percent: Payload bytes used as a percentage of $storage.
423 # total_unused: Unused bytes on pages.
424 # avg_payload: Average payload per btree entry.
425 # avg_fanout: Average fanout for internal pages.
426 # avg_unused: Average unused bytes per btree entry.
drhc74d9622017-06-15 00:52:03 +0000427 # avg_meta: Average metadata overhead per entry.
danielk19770ba87cb2004-11-09 07:42:11 +0000428 # ovfl_cnt_percent: Percentage of btree entries that use overflow pages.
429 #
drh3e27c022004-07-23 00:01:38 +0000430 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
danielk19770ba87cb2004-11-09 07:42:11 +0000431 set total_pages_percent [percent $total_pages $file_pgcnt]
drh3e27c022004-07-23 00:01:38 +0000432 set storage [expr {$total_pages*$pageSize}]
danielk19770ba87cb2004-11-09 07:42:11 +0000433 set payload_percent [percent $payload $storage {of storage consumed}]
434 set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}]
dand49c6932016-05-06 15:16:02 +0000435 set avg_payload [divide $payload $nentry]
436 set avg_unused [divide $total_unused $nentry]
drhc74d9622017-06-15 00:52:03 +0000437 set total_meta [expr {$storage - $payload - $total_unused}]
438 set total_meta [expr {$total_meta + 4*($ovfl_pages - $ovfl_cnt)}]
439 set meta_percent [percent $total_meta $storage {of metadata}]
440 set avg_meta [divide $total_meta $nentry]
drh3e27c022004-07-23 00:01:38 +0000441 if {$int_pages>0} {
danielk19770ba87cb2004-11-09 07:42:11 +0000442 # TODO: Is this formula correct?
danielk197724c92532005-02-01 10:36:40 +0000443 set nTab [mem eval "
444 SELECT count(*) FROM (
445 SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0
446 )
447 "]
448 set avg_fanout [mem eval "
449 SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used
drh8fb6c432015-08-04 14:18:10 +0000450 WHERE $where
danielk197724c92532005-02-01 10:36:40 +0000451 "]
452 set avg_fanout [format %.2f $avg_fanout]
drh3e27c022004-07-23 00:01:38 +0000453 }
dand49c6932016-05-06 15:16:02 +0000454 set ovfl_cnt_percent [percent $ovfl_cnt $nentry {of all entries}]
danielk19770ba87cb2004-11-09 07:42:11 +0000455
456 # Print out the sub-report statistics.
457 #
458 statline {Percentage of total database} $total_pages_percent
dand49c6932016-05-06 15:16:02 +0000459 statline {Number of entries} $nentry
danielk19770ba87cb2004-11-09 07:42:11 +0000460 statline {Bytes of storage consumed} $storage
drh4c9f1292011-09-28 00:50:14 +0000461 if {$compressed_size!=$storage} {
drhb7708942011-10-05 18:18:13 +0000462 set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}]
drh4c9f1292011-09-28 00:50:14 +0000463 set pct [expr {$compressed_size*100.0/$storage}]
464 set pct [format {%5.1f%%} $pct]
465 statline {Bytes used after compression} $compressed_size $pct
466 }
danielk19770ba87cb2004-11-09 07:42:11 +0000467 statline {Bytes of payload} $payload $payload_percent
drhc74d9622017-06-15 00:52:03 +0000468 statline {Bytes of metadata} $total_meta $meta_percent
drh8fb6c432015-08-04 14:18:10 +0000469 if {$cnt==1} {statline {B-tree depth} $depth}
danielk19770ba87cb2004-11-09 07:42:11 +0000470 statline {Average payload per entry} $avg_payload
471 statline {Average unused bytes per entry} $avg_unused
drhc74d9622017-06-15 00:52:03 +0000472 statline {Average metadata per entry} $avg_meta
danielk19770ba87cb2004-11-09 07:42:11 +0000473 if {[info exists avg_fanout]} {
474 statline {Average fanout} $avg_fanout
475 }
drh2f312ee2013-09-28 12:40:55 +0000476 if {$showFrag && $total_pages>1} {
477 set fragmentation [percent $gap_cnt [expr {$total_pages-1}]]
478 statline {Non-sequential pages} $gap_cnt $fragmentation
drh50c67062007-02-10 19:22:35 +0000479 }
danielk19770ba87cb2004-11-09 07:42:11 +0000480 statline {Maximum payload per entry} $mx_payload
481 statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent
drh3e27c022004-07-23 00:01:38 +0000482 if {$int_pages>0} {
danielk19770ba87cb2004-11-09 07:42:11 +0000483 statline {Index pages used} $int_pages
drh3e27c022004-07-23 00:01:38 +0000484 }
danielk19770ba87cb2004-11-09 07:42:11 +0000485 statline {Primary pages used} $leaf_pages
486 statline {Overflow pages used} $ovfl_pages
487 statline {Total pages used} $total_pages
drh3e27c022004-07-23 00:01:38 +0000488 if {$int_unused>0} {
drh4515a452011-08-31 17:46:50 +0000489 set int_unused_percent [
490 percent $int_unused [expr {$int_pages*$pageSize}] {of index space}]
danielk19770ba87cb2004-11-09 07:42:11 +0000491 statline "Unused bytes on index pages" $int_unused $int_unused_percent
drh3e27c022004-07-23 00:01:38 +0000492 }
drh4515a452011-08-31 17:46:50 +0000493 statline "Unused bytes on primary pages" $leaf_unused [
494 percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}]
495 statline "Unused bytes on overflow pages" $ovfl_unused [
496 percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}]
497 statline "Unused bytes on all pages" $total_unused [
498 percent $total_unused $storage {of all space}]
drh3e27c022004-07-23 00:01:38 +0000499 return 1
500}
501
danielk197716254452004-11-08 16:15:09 +0000502# Calculate the overhead in pages caused by auto-vacuum.
503#
504# This procedure calculates and returns the number of pages used by the
505# auto-vacuum 'pointer-map'. If the database does not support auto-vacuum,
506# then 0 is returned. The two arguments are the size of the database file in
danielk19770ba87cb2004-11-09 07:42:11 +0000507# pages and the page size used by the database (in bytes).
danielk197716254452004-11-08 16:15:09 +0000508proc autovacuum_overhead {filePages pageSize} {
509
dan599e9d22010-07-12 08:39:37 +0000510 # Set $autovacuum to non-zero for databases that support auto-vacuum.
511 set autovacuum [db one {PRAGMA auto_vacuum}]
danielk197716254452004-11-08 16:15:09 +0000512
513 # If the database is not an auto-vacuum database or the file consists
514 # of one page only then there is no overhead for auto-vacuum. Return zero.
dan599e9d22010-07-12 08:39:37 +0000515 if {0==$autovacuum || $filePages==1} {
danielk197716254452004-11-08 16:15:09 +0000516 return 0
517 }
518
519 # The number of entries on each pointer map page. The layout of the
520 # database file is one pointer-map page, followed by $ptrsPerPage other
521 # pages, followed by a pointer-map page etc. The first pointer-map page
522 # is the second page of the file overall.
523 set ptrsPerPage [expr double($pageSize/5)]
524
525 # Return the number of pointer map pages in the database.
drhf08f3842011-09-27 13:40:26 +0000526 return [expr wide(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))]
danielk197716254452004-11-08 16:15:09 +0000527}
528
danielk197716254452004-11-08 16:15:09 +0000529
danielk19770ba87cb2004-11-09 07:42:11 +0000530# Calculate the summary statistics for the database and store the results
531# in TCL variables. They are output below. Variables are as follows:
danielk197716254452004-11-08 16:15:09 +0000532#
533# pageSize: Size of each page in bytes.
534# file_bytes: File size in bytes.
535# file_pgcnt: Number of pages in the file.
536# file_pgcnt2: Number of pages in the file (calculated).
537# av_pgcnt: Pages consumed by the auto-vacuum pointer-map.
538# av_percent: Percentage of the file consumed by auto-vacuum pointer-map.
539# inuse_pgcnt: Data pages in the file.
540# inuse_percent: Percentage of pages used to store data.
541# free_pgcnt: Free pages calculated as (<total pages> - <in-use pages>)
542# free_pgcnt2: Free pages in the file according to the file header.
543# free_percent: Percentage of file consumed by free pages (calculated).
544# free_percent2: Percentage of file consumed by free pages (header).
545# ntable: Number of tables in the db.
546# nindex: Number of indices in the db.
547# nautoindex: Number of indices created automatically.
548# nmanindex: Number of indices created manually.
danielk19770ba87cb2004-11-09 07:42:11 +0000549# user_payload: Number of bytes of payload in table btrees
550# (not including sqlite_master)
551# user_percent: $user_payload as a percentage of total file size.
danielk197716254452004-11-08 16:15:09 +0000552
dan64b41c72011-09-26 19:32:47 +0000553### The following, setting $file_bytes based on the actual size of the file
554### on disk, causes this tool to choke on zipvfs databases. So set it based
555### on the return of [PRAGMA page_count] instead.
556if 0 {
557 set file_bytes [file size $file_to_analyze]
558 set file_pgcnt [expr {$file_bytes/$pageSize}]
559}
560set file_pgcnt [db one {PRAGMA page_count}]
drhf08f3842011-09-27 13:40:26 +0000561set file_bytes [expr {$file_pgcnt * $pageSize}]
danielk197716254452004-11-08 16:15:09 +0000562
563set av_pgcnt [autovacuum_overhead $file_pgcnt $pageSize]
564set av_percent [percent $av_pgcnt $file_pgcnt]
565
danielk19770ba87cb2004-11-09 07:42:11 +0000566set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used}
drhf08f3842011-09-27 13:40:26 +0000567set inuse_pgcnt [expr wide([mem eval $sql])]
danielk197716254452004-11-08 16:15:09 +0000568set inuse_percent [percent $inuse_pgcnt $file_pgcnt]
569
drhf08f3842011-09-27 13:40:26 +0000570set free_pgcnt [expr {$file_pgcnt-$inuse_pgcnt-$av_pgcnt}]
danielk197716254452004-11-08 16:15:09 +0000571set free_percent [percent $free_pgcnt $file_pgcnt]
dan599e9d22010-07-12 08:39:37 +0000572set free_pgcnt2 [db one {PRAGMA freelist_count}]
danielk197716254452004-11-08 16:15:09 +0000573set free_percent2 [percent $free_pgcnt2 $file_pgcnt]
574
575set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}]
drh3e27c022004-07-23 00:01:38 +0000576
577set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}]
drh3e27c022004-07-23 00:01:38 +0000578set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}]
danielk19770ba87cb2004-11-09 07:42:11 +0000579set sql {SELECT count(*) FROM sqlite_master WHERE name LIKE 'sqlite_autoindex%'}
580set nautoindex [db eval $sql]
danielk197716254452004-11-08 16:15:09 +0000581set nmanindex [expr {$nindex-$nautoindex}]
582
583# set total_payload [mem eval "SELECT sum(payload) FROM space_used"]
danielk197724c92532005-02-01 10:36:40 +0000584set user_payload [mem one {SELECT int(sum(payload)) FROM space_used
drh3e27c022004-07-23 00:01:38 +0000585 WHERE NOT is_index AND name NOT LIKE 'sqlite_master'}]
danielk19770ba87cb2004-11-09 07:42:11 +0000586set user_percent [percent $user_payload $file_bytes]
danielk197716254452004-11-08 16:15:09 +0000587
danielk19770ba87cb2004-11-09 07:42:11 +0000588# Output the summary statistics calculated above.
589#
drh453a3122012-10-10 10:52:46 +0000590puts "/** Disk-Space Utilization Report For $root_filename"
danielk19770ba87cb2004-11-09 07:42:11 +0000591puts ""
danielk197716254452004-11-08 16:15:09 +0000592statline {Page size in bytes} $pageSize
593statline {Pages in the whole file (measured)} $file_pgcnt
594statline {Pages in the whole file (calculated)} $file_pgcnt2
595statline {Pages that store data} $inuse_pgcnt $inuse_percent
596statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2
597statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent
598statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent
599statline {Number of tables in the database} $ntable
600statline {Number of indices} $nindex
drh2f312ee2013-09-28 12:40:55 +0000601statline {Number of defined indices} $nmanindex
602statline {Number of implied indices} $nautoindex
drhb7708942011-10-05 18:18:13 +0000603if {$isCompressed} {
604 statline {Size of uncompressed content in bytes} $file_bytes
605 set efficiency [percent $true_file_size $file_bytes]
606 statline {Size of compressed file on disk} $true_file_size $efficiency
607} else {
608 statline {Size of the file in bytes} $file_bytes
609}
danielk19770ba87cb2004-11-09 07:42:11 +0000610statline {Bytes of user payload stored} $user_payload $user_percent
drh3e27c022004-07-23 00:01:38 +0000611
612# Output table rankings
613#
614puts ""
drhcc071302013-07-17 18:12:15 +0000615titleline "Page counts for all tables with their indices"
drh3e27c022004-07-23 00:01:38 +0000616puts ""
617mem eval {SELECT tblname, count(*) AS cnt,
danielk197724c92532005-02-01 10:36:40 +0000618 int(sum(int_pages+leaf_pages+ovfl_pages)) AS size
drhfaf60c72005-03-29 13:18:16 +0000619 FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} {
drh3e27c022004-07-23 00:01:38 +0000620 statline [string toupper $tblname] $size [percent $size $file_pgcnt]
621}
drhcc071302013-07-17 18:12:15 +0000622puts ""
623titleline "Page counts for all tables and indices separately"
624puts ""
625mem eval {
626 SELECT
627 upper(name) AS nm,
628 int(int_pages+leaf_pages+ovfl_pages) AS size
629 FROM space_used
630 ORDER BY size+0 DESC, name} {} {
631 statline $nm $size [percent $size $file_pgcnt]
632}
drhb7708942011-10-05 18:18:13 +0000633if {$isCompressed} {
634 puts ""
drhcc071302013-07-17 18:12:15 +0000635 titleline "Bytes of disk space used after compression"
drhb7708942011-10-05 18:18:13 +0000636 puts ""
637 set csum 0
638 mem eval {SELECT tblname,
639 int(sum(compressed_size)) +
640 $compressOverhead*sum(int_pages+leaf_pages+ovfl_pages)
641 AS csize
642 FROM space_used GROUP BY tblname ORDER BY csize+0 DESC, tblname} {} {
643 incr csum $csize
644 statline [string toupper $tblname] $csize [percent $csize $true_file_size]
645 }
646 set overhead [expr {$true_file_size - $csum}]
647 if {$overhead>0} {
648 statline {Header and free space} $overhead [percent $overhead $true_file_size]
649 }
650}
drh3e27c022004-07-23 00:01:38 +0000651
652# Output subreports
653#
654if {$nindex>0} {
drh2f312ee2013-09-28 12:40:55 +0000655 subreport {All tables and indices} 1 0
drh3e27c022004-07-23 00:01:38 +0000656}
drh2f312ee2013-09-28 12:40:55 +0000657subreport {All tables} {NOT is_index} 0
drh3e27c022004-07-23 00:01:38 +0000658if {$nindex>0} {
drh2f312ee2013-09-28 12:40:55 +0000659 subreport {All indices} {is_index} 0
drh3e27c022004-07-23 00:01:38 +0000660}
drh7913e412013-11-01 20:30:36 +0000661foreach tbl [mem eval {SELECT DISTINCT tblname name FROM space_used
drh3e27c022004-07-23 00:01:38 +0000662 ORDER BY name}] {
drhcc071302013-07-17 18:12:15 +0000663 set qn [quote $tbl]
drh3e27c022004-07-23 00:01:38 +0000664 set name [string toupper $tbl]
drhcc071302013-07-17 18:12:15 +0000665 set n [mem eval {SELECT count(*) FROM space_used WHERE tblname=$tbl}]
drh3e27c022004-07-23 00:01:38 +0000666 if {$n>1} {
drhcc071302013-07-17 18:12:15 +0000667 set idxlist [mem eval "SELECT name FROM space_used
668 WHERE tblname='$qn' AND is_index
669 ORDER BY 1"]
drh2f312ee2013-09-28 12:40:55 +0000670 subreport "Table $name and all its indices" "tblname='$qn'" 0
671 subreport "Table $name w/o any indices" "name='$qn'" 1
drhcc071302013-07-17 18:12:15 +0000672 if {[llength $idxlist]>1} {
drh2f312ee2013-09-28 12:40:55 +0000673 subreport "Indices of table $name" "tblname='$qn' AND is_index" 0
drhcc071302013-07-17 18:12:15 +0000674 }
675 foreach idx $idxlist {
676 set qidx [quote $idx]
drh2f312ee2013-09-28 12:40:55 +0000677 subreport "Index [string toupper $idx] of table $name" "name='$qidx'" 1
drhcc071302013-07-17 18:12:15 +0000678 }
drh3e27c022004-07-23 00:01:38 +0000679 } else {
drh2f312ee2013-09-28 12:40:55 +0000680 subreport "Table $name" "name='$qn'" 1
drh3e27c022004-07-23 00:01:38 +0000681 }
682}
683
684# Output instructions on what the numbers above mean.
685#
drhcc071302013-07-17 18:12:15 +0000686puts ""
687titleline Definitions
drh3e27c022004-07-23 00:01:38 +0000688puts {
drh3e27c022004-07-23 00:01:38 +0000689Page size in bytes
690
691 The number of bytes in a single page of the database file.
692 Usually 1024.
693
694Number of pages in the whole file
695}
drh4515a452011-08-31 17:46:50 +0000696puts " The number of $pageSize-byte pages that go into forming the complete
drh3e27c022004-07-23 00:01:38 +0000697 database"
drh4515a452011-08-31 17:46:50 +0000698puts {
drh3e27c022004-07-23 00:01:38 +0000699Pages that store data
700
701 The number of pages that store data, either as primary B*Tree pages or
702 as overflow pages. The number at the right is the data pages divided by
703 the total number of pages in the file.
704
705Pages on the freelist
706
707 The number of pages that are not currently in use but are reserved for
708 future use. The percentage at the right is the number of freelist pages
709 divided by the total number of pages in the file.
710
danielk197716254452004-11-08 16:15:09 +0000711Pages of auto-vacuum overhead
712
713 The number of pages that store data used by the database to facilitate
714 auto-vacuum. This is zero for databases that do not support auto-vacuum.
715
drh3e27c022004-07-23 00:01:38 +0000716Number of tables in the database
717
718 The number of tables in the database, including the SQLITE_MASTER table
719 used to store schema information.
720
721Number of indices
722
723 The total number of indices in the database.
724
drh2f312ee2013-09-28 12:40:55 +0000725Number of defined indices
drh3e27c022004-07-23 00:01:38 +0000726
727 The number of indices created using an explicit CREATE INDEX statement.
728
drh2f312ee2013-09-28 12:40:55 +0000729Number of implied indices
drh3e27c022004-07-23 00:01:38 +0000730
731 The number of indices used to implement PRIMARY KEY or UNIQUE constraints
732 on tables.
733
734Size of the file in bytes
735
736 The total amount of disk space used by the entire database files.
737
738Bytes of user payload stored
739
740 The total number of bytes of user payload stored in the database. The
741 schema information in the SQLITE_MASTER table is not counted when
742 computing this number. The percentage at the right shows the payload
743 divided by the total file size.
744
745Percentage of total database
746
747 The amount of the complete database file that is devoted to storing
748 information described by this category.
749
750Number of entries
751
752 The total number of B-Tree key/value pairs stored under this category.
753
754Bytes of storage consumed
755
756 The total amount of disk space required to store all B-Tree entries
757 under this category. The is the total number of pages used times
758 the pages size.
759
760Bytes of payload
761
762 The amount of payload stored under this category. Payload is the data
763 part of table entries and the key part of index entries. The percentage
764 at the right is the bytes of payload divided by the bytes of storage
765 consumed.
766
drhc74d9622017-06-15 00:52:03 +0000767Bytes of metadata
768
drhf217f3b2017-06-15 16:45:23 +0000769 The amount of formatting and structural information stored in the
drhc74d9622017-06-15 00:52:03 +0000770 table or index. Metadata includes the btree page header, the cell pointer
771 array, the size field for each cell, the left child pointer or non-leaf
772 cells, the overflow pointers for overflow cells, and the rowid value for
drhf217f3b2017-06-15 16:45:23 +0000773 rowid table cells. In other words, metadata is everything that is neither
774 unused space nor content. The record header in the payload is counted as
775 content, not metadata.
drhc74d9622017-06-15 00:52:03 +0000776
drh3e27c022004-07-23 00:01:38 +0000777Average payload per entry
778
779 The average amount of payload on each entry. This is just the bytes of
780 payload divided by the number of entries.
781
782Average unused bytes per entry
783
784 The average amount of free space remaining on all pages under this
785 category on a per-entry basis. This is the number of unused bytes on
786 all pages divided by the number of entries.
787
drh2f312ee2013-09-28 12:40:55 +0000788Non-sequential pages
drhfc6e0c92007-02-13 01:41:52 +0000789
drh2f312ee2013-09-28 12:40:55 +0000790 The number of pages in the table or index that are out of sequence.
791 Many filesystems are optimized for sequential file access so a small
792 number of non-sequential pages might result in faster queries,
793 especially for larger database files that do not fit in the disk cache.
794 Note that after running VACUUM, the root page of each table or index is
795 at the beginning of the database file and all other pages are in a
796 separate part of the database file, resulting in a single non-
797 sequential page.
drhfc6e0c92007-02-13 01:41:52 +0000798
drh3e27c022004-07-23 00:01:38 +0000799Maximum payload per entry
800
801 The largest payload size of any entry.
802
803Entries that use overflow
804
805 The number of entries that user one or more overflow pages.
806
807Total pages used
808
809 This is the number of pages used to hold all information in the current
810 category. This is the sum of index, primary, and overflow pages.
811
812Index pages used
813
814 This is the number of pages in a table B-tree that hold only key (rowid)
815 information and no data.
816
817Primary pages used
818
819 This is the number of B-tree pages that hold both key and data.
820
821Overflow pages used
822
823 The total number of overflow pages used for this category.
824
825Unused bytes on index pages
826
827 The total number of bytes of unused space on all index pages. The
828 percentage at the right is the number of unused bytes divided by the
829 total number of bytes on index pages.
830
831Unused bytes on primary pages
832
833 The total number of bytes of unused space on all primary pages. The
834 percentage at the right is the number of unused bytes divided by the
835 total number of bytes on primary pages.
836
837Unused bytes on overflow pages
838
839 The total number of bytes of unused space on all overflow pages. The
840 percentage at the right is the number of unused bytes divided by the
841 total number of bytes on overflow pages.
842
843Unused bytes on all pages
844
845 The total number of bytes of unused space on all primary and overflow
846 pages. The percentage at the right is the number of unused bytes
847 divided by the total number of bytes.
848}
849
danielk19770ba87cb2004-11-09 07:42:11 +0000850# Output a dump of the in-memory database. This can be used for more
851# complex offline analysis.
drh3e27c022004-07-23 00:01:38 +0000852#
drhcc071302013-07-17 18:12:15 +0000853titleline {}
drh3e27c022004-07-23 00:01:38 +0000854puts "The entire text of this report can be sourced into any SQL database"
855puts "engine for further analysis. All of the text above is an SQL comment."
856puts "The data used to generate this report follows:"
857puts "*/"
858puts "BEGIN;"
859puts $tabledef
860unset -nocomplain x
861mem eval {SELECT * FROM space_used} x {
862 puts -nonewline "INSERT INTO space_used VALUES"
863 set sep (
864 foreach col $x(*) {
865 set v $x($col)
drha4641712013-11-02 11:34:58 +0000866 if {$v=="" || ![string is double $v]} {set v '[quote $v]'}
drh3e27c022004-07-23 00:01:38 +0000867 puts -nonewline $sep$v
868 set sep ,
869 }
870 puts ");"
871}
872puts "COMMIT;"
drha7531c62006-01-24 02:19:53 +0000873
874} err]} {
875 puts "ERROR: $err"
876 puts $errorInfo
877 exit 1
878}