blob: a227b8524359c00b9e75f052c092d5e65ab0575f [file] [log] [blame]
drh3e27c022004-07-23 00:01:38 +00001# Run this TCL script using "testfixture" in order get a report that shows
2# how much disk space is used by a particular data to actually store data
3# versus how much space is unused.
4#
5
drha7531c62006-01-24 02:19:53 +00006if {[catch {
drh3e27c022004-07-23 00:01:38 +00007# Get the name of the database to analyze
8#
drhb7708942011-10-05 18:18:13 +00009proc usage {} {
10 set argv0 [file rootname [file tail [info nameofexecutable]]]
drh3e27c022004-07-23 00:01:38 +000011 puts stderr "Usage: $argv0 database-name"
12 exit 1
13}
drhb7708942011-10-05 18:18:13 +000014set file_to_analyze {}
15set flags(-pageinfo) 0
drha624fd52011-10-05 19:46:03 +000016set flags(-stats) 0
drhb7708942011-10-05 18:18:13 +000017append argv {}
18foreach arg $argv {
19 if {[regexp {^-+pageinfo$} $arg]} {
20 set flags(-pageinfo) 1
drha624fd52011-10-05 19:46:03 +000021 } elseif {[regexp {^-+stats$} $arg]} {
22 set flags(-stats) 1
drhb7708942011-10-05 18:18:13 +000023 } elseif {[regexp {^-} $arg]} {
24 puts stderr "Unknown option: $arg"
25 usage
26 } elseif {$file_to_analyze!=""} {
27 usage
28 } else {
29 set file_to_analyze $arg
30 }
31}
32if {$file_to_analyze==""} usage
drh453a3122012-10-10 10:52:46 +000033set root_filename $file_to_analyze
34regexp {^file:(//)?([^?]*)} $file_to_analyze all x1 root_filename
35if {![file exists $root_filename]} {
36 puts stderr "No such file: $root_filename"
drh3e27c022004-07-23 00:01:38 +000037 exit 1
38}
drh453a3122012-10-10 10:52:46 +000039if {![file readable $root_filename]} {
40 puts stderr "File is not readable: $root_filename"
drh3e27c022004-07-23 00:01:38 +000041 exit 1
42}
drh453a3122012-10-10 10:52:46 +000043set true_file_size [file size $root_filename]
drhb7708942011-10-05 18:18:13 +000044if {$true_file_size<512} {
drh453a3122012-10-10 10:52:46 +000045 puts stderr "Empty or malformed database: $root_filename"
drh3e27c022004-07-23 00:01:38 +000046 exit 1
47}
48
drh36c06322011-10-10 16:06:35 +000049# Compute the total file size assuming test_multiplexor is being used.
50# Assume that SQLITE_ENABLE_8_3_NAMES might be enabled
51#
drh453a3122012-10-10 10:52:46 +000052set extension [file extension $root_filename]
53set pattern $root_filename
drh37002622012-04-06 00:09:27 +000054append pattern {[0-3][0-9][0-9]}
drh36c06322011-10-10 16:06:35 +000055foreach f [glob -nocomplain $pattern] {
56 incr true_file_size [file size $f]
57 set extension {}
58}
59if {[string length $extension]>=2 && [string length $extension]<=4} {
drh453a3122012-10-10 10:52:46 +000060 set pattern [file rootname $root_filename]
drh37002622012-04-06 00:09:27 +000061 append pattern {.[0-3][0-9][0-9]}
drh36c06322011-10-10 16:06:35 +000062 foreach f [glob -nocomplain $pattern] {
63 incr true_file_size [file size $f]
64 }
65}
66
drh3e27c022004-07-23 00:01:38 +000067# Open the database
68#
drh453a3122012-10-10 10:52:46 +000069if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} {
70 puts stderr "error trying to open $file_to_analyze: $msg"
71 exit 1
72}
dan599e9d22010-07-12 08:39:37 +000073register_dbstat_vtab db
74
drh565621a2011-09-21 20:10:42 +000075db eval {SELECT count(*) FROM sqlite_master}
drhf08f3842011-09-27 13:40:26 +000076set pageSize [expr {wide([db one {PRAGMA page_size}])}]
drh3e27c022004-07-23 00:01:38 +000077
drha624fd52011-10-05 19:46:03 +000078if {$flags(-pageinfo)} {
79 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
80 db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} {
81 puts "$pageno $name $path"
82 }
83 exit 0
84}
85if {$flags(-stats)} {
86 db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
87 puts "BEGIN;"
88 puts "CREATE TABLE stats("
89 puts " name STRING, /* Name of table or index */"
90 puts " path INTEGER, /* Path to page from root */"
91 puts " pageno INTEGER, /* Page number */"
92 puts " pagetype STRING, /* 'internal', 'leaf' or 'overflow' */"
93 puts " ncell INTEGER, /* Cells on page (0 for overflow) */"
94 puts " payload INTEGER, /* Bytes of payload on this page */"
95 puts " unused INTEGER, /* Bytes of unused space on this page */"
96 puts " mx_payload INTEGER, /* Largest payload size of all cells */"
97 puts " pgoffset INTEGER, /* Offset of page in file */"
98 puts " pgsize INTEGER /* Size of the page */"
99 puts ");"
100 db eval {SELECT quote(name) || ',' ||
101 quote(path) || ',' ||
102 quote(pageno) || ',' ||
103 quote(pagetype) || ',' ||
104 quote(ncell) || ',' ||
105 quote(payload) || ',' ||
106 quote(unused) || ',' ||
107 quote(mx_payload) || ',' ||
108 quote(pgoffset) || ',' ||
109 quote(pgsize) AS x FROM stat} {
110 puts "INSERT INTO stats VALUES($x);"
111 }
112 puts "COMMIT;"
113 exit 0
114}
115
danielk19770ba87cb2004-11-09 07:42:11 +0000116# In-memory database for collecting statistics. This script loops through
117# the tables and indices in the database being analyzed, adding a row for each
118# to an in-memory database (for which the schema is shown below). It then
119# queries the in-memory db to produce the space-analysis report.
drh3e27c022004-07-23 00:01:38 +0000120#
121sqlite3 mem :memory:
drh4515a452011-08-31 17:46:50 +0000122set tabledef {CREATE TABLE space_used(
drh3e27c022004-07-23 00:01:38 +0000123 name clob, -- Name of a table or index in the database file
124 tblname clob, -- Name of associated table
125 is_index boolean, -- TRUE if it is an index, false for a table
126 nentry int, -- Number of entries in the BTree
127 leaf_entries int, -- Number of leaf entries
128 payload int, -- Total amount of data stored in this table or index
129 ovfl_payload int, -- Total amount of data stored on overflow pages
130 ovfl_cnt int, -- Number of entries that use overflow
131 mx_payload int, -- Maximum payload size
132 int_pages int, -- Number of interior pages used
133 leaf_pages int, -- Number of leaf pages used
134 ovfl_pages int, -- Number of overflow pages used
135 int_unused int, -- Number of unused bytes on interior pages
136 leaf_unused int, -- Number of unused bytes on primary pages
drh50c67062007-02-10 19:22:35 +0000137 ovfl_unused int, -- Number of unused bytes on overflow pages
drh4c9f1292011-09-28 00:50:14 +0000138 gap_cnt int, -- Number of gaps in the page layout
139 compressed_size int -- Total bytes stored on disk
drh3e27c022004-07-23 00:01:38 +0000140);}
141mem eval $tabledef
142
dan599e9d22010-07-12 08:39:37 +0000143# Create a temporary "dbstat" virtual table.
144#
drh565621a2011-09-21 20:10:42 +0000145db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
146db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat
147 ORDER BY name, path}
148db eval {DROP TABLE temp.stat}
dan599e9d22010-07-12 08:39:37 +0000149
150proc isleaf {pagetype is_index} {
151 return [expr {$pagetype == "leaf" || ($pagetype == "internal" && $is_index)}]
152}
153proc isoverflow {pagetype is_index} {
154 return [expr {$pagetype == "overflow"}]
155}
156proc isinternal {pagetype is_index} {
157 return [expr {$pagetype == "internal" && $is_index==0}]
158}
159
160db func isleaf isleaf
161db func isinternal isinternal
162db func isoverflow isoverflow
163
drhb7708942011-10-05 18:18:13 +0000164set isCompressed 0
165set compressOverhead 0
dan599e9d22010-07-12 08:39:37 +0000166set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 }
167foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] {
168
169 set is_index [expr {$name!=$tblname}]
170 db eval {
171 SELECT
172 sum(ncell) AS nentry,
173 sum(isleaf(pagetype, $is_index) * ncell) AS leaf_entries,
174 sum(payload) AS payload,
175 sum(isoverflow(pagetype, $is_index) * payload) AS ovfl_payload,
176 sum(path LIKE '%+000000') AS ovfl_cnt,
177 max(mx_payload) AS mx_payload,
178 sum(isinternal(pagetype, $is_index)) AS int_pages,
179 sum(isleaf(pagetype, $is_index)) AS leaf_pages,
180 sum(isoverflow(pagetype, $is_index)) AS ovfl_pages,
181 sum(isinternal(pagetype, $is_index) * unused) AS int_unused,
182 sum(isleaf(pagetype, $is_index) * unused) AS leaf_unused,
drh4c9f1292011-09-28 00:50:14 +0000183 sum(isoverflow(pagetype, $is_index) * unused) AS ovfl_unused,
184 sum(pgsize) AS compressed_size
dan599e9d22010-07-12 08:39:37 +0000185 FROM temp.dbstat WHERE name = $name
186 } break
187
drhb7708942011-10-05 18:18:13 +0000188 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
189 set storage [expr {$total_pages*$pageSize}]
190 if {!$isCompressed && $storage>$compressed_size} {
191 set isCompressed 1
192 set compressOverhead 14
193 }
194
dan599e9d22010-07-12 08:39:37 +0000195 # Column 'gap_cnt' is set to the number of non-contiguous entries in the
196 # list of pages visited if the b-tree structure is traversed in a top-down
197 # fashion (each node visited before its child-tree is passed). Any overflow
198 # chains present are traversed from start to finish before any child-tree
199 # is.
200 #
201 set gap_cnt 0
drh2f312ee2013-09-28 12:40:55 +0000202 set prev 0
203 db eval {
204 SELECT pageno, pagetype FROM temp.dbstat
205 WHERE name=$name
206 ORDER BY pageno
207 } {
208 if {$prev>0 && $pagetype=="leaf" && $pageno!=$prev+1} {
209 incr gap_cnt
210 }
211 set prev $pageno
dan599e9d22010-07-12 08:39:37 +0000212 }
dan599e9d22010-07-12 08:39:37 +0000213 mem eval {
214 INSERT INTO space_used VALUES(
215 $name,
216 $tblname,
217 $is_index,
218 $nentry,
219 $leaf_entries,
220 $payload,
221 $ovfl_payload,
222 $ovfl_cnt,
223 $mx_payload,
224 $int_pages,
225 $leaf_pages,
226 $ovfl_pages,
227 $int_unused,
228 $leaf_unused,
229 $ovfl_unused,
drh4c9f1292011-09-28 00:50:14 +0000230 $gap_cnt,
231 $compressed_size
dan599e9d22010-07-12 08:39:37 +0000232 );
233 }
234}
235
danielk197724c92532005-02-01 10:36:40 +0000236proc integerify {real} {
drh03496882007-12-04 13:41:51 +0000237 if {[string is double -strict $real]} {
drhf08f3842011-09-27 13:40:26 +0000238 return [expr {wide($real)}]
drh03496882007-12-04 13:41:51 +0000239 } else {
240 return 0
241 }
danielk197724c92532005-02-01 10:36:40 +0000242}
243mem function int integerify
244
danielk19770ba87cb2004-11-09 07:42:11 +0000245# Quote a string for use in an SQL query. Examples:
drh3e27c022004-07-23 00:01:38 +0000246#
danielk19770ba87cb2004-11-09 07:42:11 +0000247# [quote {hello world}] == {'hello world'}
248# [quote {hello world's}] == {'hello world''s'}
drh3e27c022004-07-23 00:01:38 +0000249#
danielk19770ba87cb2004-11-09 07:42:11 +0000250proc quote {txt} {
drhcc071302013-07-17 18:12:15 +0000251 return [string map {' ''} $txt]
252}
253
254# Output a title line
255#
256proc titleline {title} {
257 if {$title==""} {
258 puts [string repeat * 79]
259 } else {
260 set len [string length $title]
261 set stars [string repeat * [expr 79-$len-5]]
262 puts "*** $title $stars"
263 }
drh3e27c022004-07-23 00:01:38 +0000264}
265
drh3e27c022004-07-23 00:01:38 +0000266# Generate a single line of output in the statistics section of the
267# report.
268#
269proc statline {title value {extra {}}} {
270 set len [string length $title]
drhcc071302013-07-17 18:12:15 +0000271 set dots [string repeat . [expr 50-$len]]
drh3e27c022004-07-23 00:01:38 +0000272 set len [string length $value]
273 set sp2 [string range { } $len end]
274 if {$extra ne ""} {
275 set extra " $extra"
276 }
277 puts "$title$dots $value$sp2$extra"
278}
279
280# Generate a formatted percentage value for $num/$denom
281#
282proc percent {num denom {of {}}} {
283 if {$denom==0.0} {return ""}
284 set v [expr {$num*100.0/$denom}]
285 set of {}
drhfaf60c72005-03-29 13:18:16 +0000286 if {$v==100.0 || $v<0.001 || ($v>1.0 && $v<99.0)} {
drh3e27c022004-07-23 00:01:38 +0000287 return [format {%5.1f%% %s} $v $of]
288 } elseif {$v<0.1 || $v>99.9} {
289 return [format {%7.3f%% %s} $v $of]
290 } else {
291 return [format {%6.2f%% %s} $v $of]
292 }
293}
294
danielk197724c92532005-02-01 10:36:40 +0000295proc divide {num denom} {
296 if {$denom==0} {return 0.0}
297 return [format %.2f [expr double($num)/double($denom)]]
298}
299
drh3e27c022004-07-23 00:01:38 +0000300# Generate a subreport that covers some subset of the database.
301# the $where clause determines which subset to analyze.
302#
drh2f312ee2013-09-28 12:40:55 +0000303proc subreport {title where showFrag} {
drhb7708942011-10-05 18:18:13 +0000304 global pageSize file_pgcnt compressOverhead
danielk19770ba87cb2004-11-09 07:42:11 +0000305
306 # Query the in-memory database for the sum of various statistics
307 # for the subset of tables/indices identified by the WHERE clause in
308 # $where. Note that even if the WHERE clause matches no rows, the
309 # following query returns exactly one row (because it is an aggregate).
310 #
311 # The results of the query are stored directly by SQLite into local
312 # variables (i.e. $nentry, $nleaf etc.).
313 #
drh3e27c022004-07-23 00:01:38 +0000314 mem eval "
315 SELECT
danielk197724c92532005-02-01 10:36:40 +0000316 int(sum(nentry)) AS nentry,
317 int(sum(leaf_entries)) AS nleaf,
318 int(sum(payload)) AS payload,
319 int(sum(ovfl_payload)) AS ovfl_payload,
drh3e27c022004-07-23 00:01:38 +0000320 max(mx_payload) AS mx_payload,
danielk197724c92532005-02-01 10:36:40 +0000321 int(sum(ovfl_cnt)) as ovfl_cnt,
322 int(sum(leaf_pages)) AS leaf_pages,
323 int(sum(int_pages)) AS int_pages,
324 int(sum(ovfl_pages)) AS ovfl_pages,
325 int(sum(leaf_unused)) AS leaf_unused,
326 int(sum(int_unused)) AS int_unused,
drh50c67062007-02-10 19:22:35 +0000327 int(sum(ovfl_unused)) AS ovfl_unused,
drh4c9f1292011-09-28 00:50:14 +0000328 int(sum(gap_cnt)) AS gap_cnt,
329 int(sum(compressed_size)) AS compressed_size
danielk19770ba87cb2004-11-09 07:42:11 +0000330 FROM space_used WHERE $where" {} {}
331
332 # Output the sub-report title, nicely decorated with * characters.
333 #
drh3e27c022004-07-23 00:01:38 +0000334 puts ""
drhcc071302013-07-17 18:12:15 +0000335 titleline $title
drh3e27c022004-07-23 00:01:38 +0000336 puts ""
danielk19770ba87cb2004-11-09 07:42:11 +0000337
338 # Calculate statistics and store the results in TCL variables, as follows:
339 #
340 # total_pages: Database pages consumed.
341 # total_pages_percent: Pages consumed as a percentage of the file.
342 # storage: Bytes consumed.
343 # payload_percent: Payload bytes used as a percentage of $storage.
344 # total_unused: Unused bytes on pages.
345 # avg_payload: Average payload per btree entry.
346 # avg_fanout: Average fanout for internal pages.
347 # avg_unused: Average unused bytes per btree entry.
348 # ovfl_cnt_percent: Percentage of btree entries that use overflow pages.
349 #
drh3e27c022004-07-23 00:01:38 +0000350 set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
danielk19770ba87cb2004-11-09 07:42:11 +0000351 set total_pages_percent [percent $total_pages $file_pgcnt]
drh3e27c022004-07-23 00:01:38 +0000352 set storage [expr {$total_pages*$pageSize}]
danielk19770ba87cb2004-11-09 07:42:11 +0000353 set payload_percent [percent $payload $storage {of storage consumed}]
354 set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}]
danielk197724c92532005-02-01 10:36:40 +0000355 set avg_payload [divide $payload $nleaf]
356 set avg_unused [divide $total_unused $nleaf]
drh3e27c022004-07-23 00:01:38 +0000357 if {$int_pages>0} {
danielk19770ba87cb2004-11-09 07:42:11 +0000358 # TODO: Is this formula correct?
danielk197724c92532005-02-01 10:36:40 +0000359 set nTab [mem eval "
360 SELECT count(*) FROM (
361 SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0
362 )
363 "]
364 set avg_fanout [mem eval "
365 SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used
366 WHERE $where AND is_index = 0
367 "]
368 set avg_fanout [format %.2f $avg_fanout]
drh3e27c022004-07-23 00:01:38 +0000369 }
danielk19770ba87cb2004-11-09 07:42:11 +0000370 set ovfl_cnt_percent [percent $ovfl_cnt $nleaf {of all entries}]
371
372 # Print out the sub-report statistics.
373 #
374 statline {Percentage of total database} $total_pages_percent
375 statline {Number of entries} $nleaf
376 statline {Bytes of storage consumed} $storage
drh4c9f1292011-09-28 00:50:14 +0000377 if {$compressed_size!=$storage} {
drhb7708942011-10-05 18:18:13 +0000378 set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}]
drh4c9f1292011-09-28 00:50:14 +0000379 set pct [expr {$compressed_size*100.0/$storage}]
380 set pct [format {%5.1f%%} $pct]
381 statline {Bytes used after compression} $compressed_size $pct
382 }
danielk19770ba87cb2004-11-09 07:42:11 +0000383 statline {Bytes of payload} $payload $payload_percent
384 statline {Average payload per entry} $avg_payload
385 statline {Average unused bytes per entry} $avg_unused
386 if {[info exists avg_fanout]} {
387 statline {Average fanout} $avg_fanout
388 }
drh2f312ee2013-09-28 12:40:55 +0000389 if {$showFrag && $total_pages>1} {
390 set fragmentation [percent $gap_cnt [expr {$total_pages-1}]]
391 statline {Non-sequential pages} $gap_cnt $fragmentation
drh50c67062007-02-10 19:22:35 +0000392 }
danielk19770ba87cb2004-11-09 07:42:11 +0000393 statline {Maximum payload per entry} $mx_payload
394 statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent
drh3e27c022004-07-23 00:01:38 +0000395 if {$int_pages>0} {
danielk19770ba87cb2004-11-09 07:42:11 +0000396 statline {Index pages used} $int_pages
drh3e27c022004-07-23 00:01:38 +0000397 }
danielk19770ba87cb2004-11-09 07:42:11 +0000398 statline {Primary pages used} $leaf_pages
399 statline {Overflow pages used} $ovfl_pages
400 statline {Total pages used} $total_pages
drh3e27c022004-07-23 00:01:38 +0000401 if {$int_unused>0} {
drh4515a452011-08-31 17:46:50 +0000402 set int_unused_percent [
403 percent $int_unused [expr {$int_pages*$pageSize}] {of index space}]
danielk19770ba87cb2004-11-09 07:42:11 +0000404 statline "Unused bytes on index pages" $int_unused $int_unused_percent
drh3e27c022004-07-23 00:01:38 +0000405 }
drh4515a452011-08-31 17:46:50 +0000406 statline "Unused bytes on primary pages" $leaf_unused [
407 percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}]
408 statline "Unused bytes on overflow pages" $ovfl_unused [
409 percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}]
410 statline "Unused bytes on all pages" $total_unused [
411 percent $total_unused $storage {of all space}]
drh3e27c022004-07-23 00:01:38 +0000412 return 1
413}
414
danielk197716254452004-11-08 16:15:09 +0000415# Calculate the overhead in pages caused by auto-vacuum.
416#
417# This procedure calculates and returns the number of pages used by the
418# auto-vacuum 'pointer-map'. If the database does not support auto-vacuum,
419# then 0 is returned. The two arguments are the size of the database file in
danielk19770ba87cb2004-11-09 07:42:11 +0000420# pages and the page size used by the database (in bytes).
danielk197716254452004-11-08 16:15:09 +0000421proc autovacuum_overhead {filePages pageSize} {
422
dan599e9d22010-07-12 08:39:37 +0000423 # Set $autovacuum to non-zero for databases that support auto-vacuum.
424 set autovacuum [db one {PRAGMA auto_vacuum}]
danielk197716254452004-11-08 16:15:09 +0000425
426 # If the database is not an auto-vacuum database or the file consists
427 # of one page only then there is no overhead for auto-vacuum. Return zero.
dan599e9d22010-07-12 08:39:37 +0000428 if {0==$autovacuum || $filePages==1} {
danielk197716254452004-11-08 16:15:09 +0000429 return 0
430 }
431
432 # The number of entries on each pointer map page. The layout of the
433 # database file is one pointer-map page, followed by $ptrsPerPage other
434 # pages, followed by a pointer-map page etc. The first pointer-map page
435 # is the second page of the file overall.
436 set ptrsPerPage [expr double($pageSize/5)]
437
438 # Return the number of pointer map pages in the database.
drhf08f3842011-09-27 13:40:26 +0000439 return [expr wide(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))]
danielk197716254452004-11-08 16:15:09 +0000440}
441
danielk197716254452004-11-08 16:15:09 +0000442
danielk19770ba87cb2004-11-09 07:42:11 +0000443# Calculate the summary statistics for the database and store the results
444# in TCL variables. They are output below. Variables are as follows:
danielk197716254452004-11-08 16:15:09 +0000445#
446# pageSize: Size of each page in bytes.
447# file_bytes: File size in bytes.
448# file_pgcnt: Number of pages in the file.
449# file_pgcnt2: Number of pages in the file (calculated).
450# av_pgcnt: Pages consumed by the auto-vacuum pointer-map.
451# av_percent: Percentage of the file consumed by auto-vacuum pointer-map.
452# inuse_pgcnt: Data pages in the file.
453# inuse_percent: Percentage of pages used to store data.
454# free_pgcnt: Free pages calculated as (<total pages> - <in-use pages>)
455# free_pgcnt2: Free pages in the file according to the file header.
456# free_percent: Percentage of file consumed by free pages (calculated).
457# free_percent2: Percentage of file consumed by free pages (header).
458# ntable: Number of tables in the db.
459# nindex: Number of indices in the db.
460# nautoindex: Number of indices created automatically.
461# nmanindex: Number of indices created manually.
danielk19770ba87cb2004-11-09 07:42:11 +0000462# user_payload: Number of bytes of payload in table btrees
463# (not including sqlite_master)
464# user_percent: $user_payload as a percentage of total file size.
danielk197716254452004-11-08 16:15:09 +0000465
dan64b41c72011-09-26 19:32:47 +0000466### The following, setting $file_bytes based on the actual size of the file
467### on disk, causes this tool to choke on zipvfs databases. So set it based
468### on the return of [PRAGMA page_count] instead.
469if 0 {
470 set file_bytes [file size $file_to_analyze]
471 set file_pgcnt [expr {$file_bytes/$pageSize}]
472}
473set file_pgcnt [db one {PRAGMA page_count}]
drhf08f3842011-09-27 13:40:26 +0000474set file_bytes [expr {$file_pgcnt * $pageSize}]
danielk197716254452004-11-08 16:15:09 +0000475
476set av_pgcnt [autovacuum_overhead $file_pgcnt $pageSize]
477set av_percent [percent $av_pgcnt $file_pgcnt]
478
danielk19770ba87cb2004-11-09 07:42:11 +0000479set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used}
drhf08f3842011-09-27 13:40:26 +0000480set inuse_pgcnt [expr wide([mem eval $sql])]
danielk197716254452004-11-08 16:15:09 +0000481set inuse_percent [percent $inuse_pgcnt $file_pgcnt]
482
drhf08f3842011-09-27 13:40:26 +0000483set free_pgcnt [expr {$file_pgcnt-$inuse_pgcnt-$av_pgcnt}]
danielk197716254452004-11-08 16:15:09 +0000484set free_percent [percent $free_pgcnt $file_pgcnt]
dan599e9d22010-07-12 08:39:37 +0000485set free_pgcnt2 [db one {PRAGMA freelist_count}]
danielk197716254452004-11-08 16:15:09 +0000486set free_percent2 [percent $free_pgcnt2 $file_pgcnt]
487
488set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}]
drh3e27c022004-07-23 00:01:38 +0000489
490set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}]
drh3e27c022004-07-23 00:01:38 +0000491set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}]
danielk19770ba87cb2004-11-09 07:42:11 +0000492set sql {SELECT count(*) FROM sqlite_master WHERE name LIKE 'sqlite_autoindex%'}
493set nautoindex [db eval $sql]
danielk197716254452004-11-08 16:15:09 +0000494set nmanindex [expr {$nindex-$nautoindex}]
495
496# set total_payload [mem eval "SELECT sum(payload) FROM space_used"]
danielk197724c92532005-02-01 10:36:40 +0000497set user_payload [mem one {SELECT int(sum(payload)) FROM space_used
drh3e27c022004-07-23 00:01:38 +0000498 WHERE NOT is_index AND name NOT LIKE 'sqlite_master'}]
danielk19770ba87cb2004-11-09 07:42:11 +0000499set user_percent [percent $user_payload $file_bytes]
danielk197716254452004-11-08 16:15:09 +0000500
danielk19770ba87cb2004-11-09 07:42:11 +0000501# Output the summary statistics calculated above.
502#
drh453a3122012-10-10 10:52:46 +0000503puts "/** Disk-Space Utilization Report For $root_filename"
danielk19770ba87cb2004-11-09 07:42:11 +0000504puts ""
danielk197716254452004-11-08 16:15:09 +0000505statline {Page size in bytes} $pageSize
506statline {Pages in the whole file (measured)} $file_pgcnt
507statline {Pages in the whole file (calculated)} $file_pgcnt2
508statline {Pages that store data} $inuse_pgcnt $inuse_percent
509statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2
510statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent
511statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent
512statline {Number of tables in the database} $ntable
513statline {Number of indices} $nindex
drh2f312ee2013-09-28 12:40:55 +0000514statline {Number of defined indices} $nmanindex
515statline {Number of implied indices} $nautoindex
drhb7708942011-10-05 18:18:13 +0000516if {$isCompressed} {
517 statline {Size of uncompressed content in bytes} $file_bytes
518 set efficiency [percent $true_file_size $file_bytes]
519 statline {Size of compressed file on disk} $true_file_size $efficiency
520} else {
521 statline {Size of the file in bytes} $file_bytes
522}
danielk19770ba87cb2004-11-09 07:42:11 +0000523statline {Bytes of user payload stored} $user_payload $user_percent
drh3e27c022004-07-23 00:01:38 +0000524
525# Output table rankings
526#
527puts ""
drhcc071302013-07-17 18:12:15 +0000528titleline "Page counts for all tables with their indices"
drh3e27c022004-07-23 00:01:38 +0000529puts ""
530mem eval {SELECT tblname, count(*) AS cnt,
danielk197724c92532005-02-01 10:36:40 +0000531 int(sum(int_pages+leaf_pages+ovfl_pages)) AS size
drhfaf60c72005-03-29 13:18:16 +0000532 FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} {
drh3e27c022004-07-23 00:01:38 +0000533 statline [string toupper $tblname] $size [percent $size $file_pgcnt]
534}
drhcc071302013-07-17 18:12:15 +0000535puts ""
536titleline "Page counts for all tables and indices separately"
537puts ""
538mem eval {
539 SELECT
540 upper(name) AS nm,
541 int(int_pages+leaf_pages+ovfl_pages) AS size
542 FROM space_used
543 ORDER BY size+0 DESC, name} {} {
544 statline $nm $size [percent $size $file_pgcnt]
545}
drhb7708942011-10-05 18:18:13 +0000546if {$isCompressed} {
547 puts ""
drhcc071302013-07-17 18:12:15 +0000548 titleline "Bytes of disk space used after compression"
drhb7708942011-10-05 18:18:13 +0000549 puts ""
550 set csum 0
551 mem eval {SELECT tblname,
552 int(sum(compressed_size)) +
553 $compressOverhead*sum(int_pages+leaf_pages+ovfl_pages)
554 AS csize
555 FROM space_used GROUP BY tblname ORDER BY csize+0 DESC, tblname} {} {
556 incr csum $csize
557 statline [string toupper $tblname] $csize [percent $csize $true_file_size]
558 }
559 set overhead [expr {$true_file_size - $csum}]
560 if {$overhead>0} {
561 statline {Header and free space} $overhead [percent $overhead $true_file_size]
562 }
563}
drh3e27c022004-07-23 00:01:38 +0000564
565# Output subreports
566#
567if {$nindex>0} {
drh2f312ee2013-09-28 12:40:55 +0000568 subreport {All tables and indices} 1 0
drh3e27c022004-07-23 00:01:38 +0000569}
drh2f312ee2013-09-28 12:40:55 +0000570subreport {All tables} {NOT is_index} 0
drh3e27c022004-07-23 00:01:38 +0000571if {$nindex>0} {
drh2f312ee2013-09-28 12:40:55 +0000572 subreport {All indices} {is_index} 0
drh3e27c022004-07-23 00:01:38 +0000573}
drh7913e412013-11-01 20:30:36 +0000574foreach tbl [mem eval {SELECT DISTINCT tblname name FROM space_used
drh3e27c022004-07-23 00:01:38 +0000575 ORDER BY name}] {
drhcc071302013-07-17 18:12:15 +0000576 set qn [quote $tbl]
drh3e27c022004-07-23 00:01:38 +0000577 set name [string toupper $tbl]
drhcc071302013-07-17 18:12:15 +0000578 set n [mem eval {SELECT count(*) FROM space_used WHERE tblname=$tbl}]
drh3e27c022004-07-23 00:01:38 +0000579 if {$n>1} {
drhcc071302013-07-17 18:12:15 +0000580 set idxlist [mem eval "SELECT name FROM space_used
581 WHERE tblname='$qn' AND is_index
582 ORDER BY 1"]
drh2f312ee2013-09-28 12:40:55 +0000583 subreport "Table $name and all its indices" "tblname='$qn'" 0
584 subreport "Table $name w/o any indices" "name='$qn'" 1
drhcc071302013-07-17 18:12:15 +0000585 if {[llength $idxlist]>1} {
drh2f312ee2013-09-28 12:40:55 +0000586 subreport "Indices of table $name" "tblname='$qn' AND is_index" 0
drhcc071302013-07-17 18:12:15 +0000587 }
588 foreach idx $idxlist {
589 set qidx [quote $idx]
drh2f312ee2013-09-28 12:40:55 +0000590 subreport "Index [string toupper $idx] of table $name" "name='$qidx'" 1
drhcc071302013-07-17 18:12:15 +0000591 }
drh3e27c022004-07-23 00:01:38 +0000592 } else {
drh2f312ee2013-09-28 12:40:55 +0000593 subreport "Table $name" "name='$qn'" 1
drh3e27c022004-07-23 00:01:38 +0000594 }
595}
596
597# Output instructions on what the numbers above mean.
598#
drhcc071302013-07-17 18:12:15 +0000599puts ""
600titleline Definitions
drh3e27c022004-07-23 00:01:38 +0000601puts {
drh3e27c022004-07-23 00:01:38 +0000602Page size in bytes
603
604 The number of bytes in a single page of the database file.
605 Usually 1024.
606
607Number of pages in the whole file
608}
drh4515a452011-08-31 17:46:50 +0000609puts " The number of $pageSize-byte pages that go into forming the complete
drh3e27c022004-07-23 00:01:38 +0000610 database"
drh4515a452011-08-31 17:46:50 +0000611puts {
drh3e27c022004-07-23 00:01:38 +0000612Pages that store data
613
614 The number of pages that store data, either as primary B*Tree pages or
615 as overflow pages. The number at the right is the data pages divided by
616 the total number of pages in the file.
617
618Pages on the freelist
619
620 The number of pages that are not currently in use but are reserved for
621 future use. The percentage at the right is the number of freelist pages
622 divided by the total number of pages in the file.
623
danielk197716254452004-11-08 16:15:09 +0000624Pages of auto-vacuum overhead
625
626 The number of pages that store data used by the database to facilitate
627 auto-vacuum. This is zero for databases that do not support auto-vacuum.
628
drh3e27c022004-07-23 00:01:38 +0000629Number of tables in the database
630
631 The number of tables in the database, including the SQLITE_MASTER table
632 used to store schema information.
633
634Number of indices
635
636 The total number of indices in the database.
637
drh2f312ee2013-09-28 12:40:55 +0000638Number of defined indices
drh3e27c022004-07-23 00:01:38 +0000639
640 The number of indices created using an explicit CREATE INDEX statement.
641
drh2f312ee2013-09-28 12:40:55 +0000642Number of implied indices
drh3e27c022004-07-23 00:01:38 +0000643
644 The number of indices used to implement PRIMARY KEY or UNIQUE constraints
645 on tables.
646
647Size of the file in bytes
648
649 The total amount of disk space used by the entire database files.
650
651Bytes of user payload stored
652
653 The total number of bytes of user payload stored in the database. The
654 schema information in the SQLITE_MASTER table is not counted when
655 computing this number. The percentage at the right shows the payload
656 divided by the total file size.
657
658Percentage of total database
659
660 The amount of the complete database file that is devoted to storing
661 information described by this category.
662
663Number of entries
664
665 The total number of B-Tree key/value pairs stored under this category.
666
667Bytes of storage consumed
668
669 The total amount of disk space required to store all B-Tree entries
670 under this category. The is the total number of pages used times
671 the pages size.
672
673Bytes of payload
674
675 The amount of payload stored under this category. Payload is the data
676 part of table entries and the key part of index entries. The percentage
677 at the right is the bytes of payload divided by the bytes of storage
678 consumed.
679
680Average payload per entry
681
682 The average amount of payload on each entry. This is just the bytes of
683 payload divided by the number of entries.
684
685Average unused bytes per entry
686
687 The average amount of free space remaining on all pages under this
688 category on a per-entry basis. This is the number of unused bytes on
689 all pages divided by the number of entries.
690
drh2f312ee2013-09-28 12:40:55 +0000691Non-sequential pages
drhfc6e0c92007-02-13 01:41:52 +0000692
drh2f312ee2013-09-28 12:40:55 +0000693 The number of pages in the table or index that are out of sequence.
694 Many filesystems are optimized for sequential file access so a small
695 number of non-sequential pages might result in faster queries,
696 especially for larger database files that do not fit in the disk cache.
697 Note that after running VACUUM, the root page of each table or index is
698 at the beginning of the database file and all other pages are in a
699 separate part of the database file, resulting in a single non-
700 sequential page.
drhfc6e0c92007-02-13 01:41:52 +0000701
drh3e27c022004-07-23 00:01:38 +0000702Maximum payload per entry
703
704 The largest payload size of any entry.
705
706Entries that use overflow
707
708 The number of entries that user one or more overflow pages.
709
710Total pages used
711
712 This is the number of pages used to hold all information in the current
713 category. This is the sum of index, primary, and overflow pages.
714
715Index pages used
716
717 This is the number of pages in a table B-tree that hold only key (rowid)
718 information and no data.
719
720Primary pages used
721
722 This is the number of B-tree pages that hold both key and data.
723
724Overflow pages used
725
726 The total number of overflow pages used for this category.
727
728Unused bytes on index pages
729
730 The total number of bytes of unused space on all index pages. The
731 percentage at the right is the number of unused bytes divided by the
732 total number of bytes on index pages.
733
734Unused bytes on primary pages
735
736 The total number of bytes of unused space on all primary pages. The
737 percentage at the right is the number of unused bytes divided by the
738 total number of bytes on primary pages.
739
740Unused bytes on overflow pages
741
742 The total number of bytes of unused space on all overflow pages. The
743 percentage at the right is the number of unused bytes divided by the
744 total number of bytes on overflow pages.
745
746Unused bytes on all pages
747
748 The total number of bytes of unused space on all primary and overflow
749 pages. The percentage at the right is the number of unused bytes
750 divided by the total number of bytes.
751}
752
danielk19770ba87cb2004-11-09 07:42:11 +0000753# Output a dump of the in-memory database. This can be used for more
754# complex offline analysis.
drh3e27c022004-07-23 00:01:38 +0000755#
drhcc071302013-07-17 18:12:15 +0000756titleline {}
drh3e27c022004-07-23 00:01:38 +0000757puts "The entire text of this report can be sourced into any SQL database"
758puts "engine for further analysis. All of the text above is an SQL comment."
759puts "The data used to generate this report follows:"
760puts "*/"
761puts "BEGIN;"
762puts $tabledef
763unset -nocomplain x
764mem eval {SELECT * FROM space_used} x {
765 puts -nonewline "INSERT INTO space_used VALUES"
766 set sep (
767 foreach col $x(*) {
768 set v $x($col)
drha4641712013-11-02 11:34:58 +0000769 if {$v=="" || ![string is double $v]} {set v '[quote $v]'}
drh3e27c022004-07-23 00:01:38 +0000770 puts -nonewline $sep$v
771 set sep ,
772 }
773 puts ");"
774}
775puts "COMMIT;"
drha7531c62006-01-24 02:19:53 +0000776
777} err]} {
778 puts "ERROR: $err"
779 puts $errorInfo
780 exit 1
781}