blob: 6abf55c6379ca6a4163f22f51fee42cae1a515bd [file] [log] [blame]
Jim Stichnoth307e3262015-02-12 16:10:37 -08001#!/usr/bin/python
2#
3# Copyright 2013 Google Inc. All Rights Reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17import fileinput
18import operator
19import optparse
20import os
21import pprint
22import re
23import subprocess
24import sys
25import json
26
27def format_bytes(bytes):
28 """Pretty-print a number of bytes."""
29 if bytes > 1e6:
30 bytes = bytes / 1.0e6
31 return '%.1fm' % bytes
32 if bytes > 1e3:
33 bytes = bytes / 1.0e3
34 return '%.1fk' % bytes
35 return str(bytes)
36
37
38def symbol_type_to_human(type):
39 """Convert a symbol type as printed by nm into a human-readable name."""
40 return {
41 'b': 'bss',
42 'd': 'data',
43 'r': 'read-only data',
44 't': 'code',
45 'u': 'weak symbol', # Unique global.
46 'w': 'weak symbol',
47 'v': 'weak symbol'
48 }[type]
49
50
51def parse_nm(input):
52 """Parse nm output.
53
54 Argument: an iterable over lines of nm output.
55
56 Yields: (symbol name, symbol type, symbol size, source file path).
57 Path may be None if nm couldn't figure out the source file.
58 """
59
60 # Match lines with size + symbol + optional filename.
61 sym_re = re.compile(r'^[0-9a-f]+ ([0-9a-f]+) (.) ([^\t]+)(?:\t(.*):\d+)?$')
62
63 # Match lines with addr but no size.
64 addr_re = re.compile(r'^[0-9a-f]+ (.) ([^\t]+)(?:\t.*)?$')
65 # Match lines that don't have an address at all -- typically external symbols.
66 noaddr_re = re.compile(r'^ + (.) (.*)$')
67
68 for line in input:
69 line = line.rstrip()
70 match = sym_re.match(line)
71 if match:
72 size, type, sym = match.groups()[0:3]
73 size = int(size, 16)
74 type = type.lower()
75 if type in ['u', 'v']:
76 type = 'w' # just call them all weak
77 if type == 'b':
78 continue # skip all BSS for now
79 path = match.group(4)
80 yield sym, type, size, path
81 continue
82 match = addr_re.match(line)
83 if match:
84 type, sym = match.groups()[0:2]
85 # No size == we don't care.
86 continue
87 match = noaddr_re.match(line)
88 if match:
89 type, sym = match.groups()
90 if type in ('U', 'w'):
91 # external or weak symbol
92 continue
93
94 print >>sys.stderr, 'unparsed:', repr(line)
95
96def demangle(ident, cppfilt):
97 if cppfilt and ident.startswith('_Z'):
98 # Demangle names when possible. Mangled names all start with _Z.
99 ident = subprocess.check_output([cppfilt, ident]).strip()
100 return ident
101
102
103class Suffix:
104 def __init__(self, suffix, replacement):
105 self.pattern = '^(.*)' + suffix + '(.*)$'
106 self.re = re.compile(self.pattern)
107 self.replacement = replacement
108
109class SuffixCleanup:
110 """Pre-compile suffix regular expressions."""
111 def __init__(self):
112 self.suffixes = [
113 Suffix('\.part\.([0-9]+)', 'part'),
114 Suffix('\.constprop\.([0-9]+)', 'constprop'),
115 Suffix('\.isra\.([0-9]+)', 'isra'),
116 ]
117 def cleanup(self, ident, cppfilt):
118 """Cleanup identifiers that have suffixes preventing demangling,
119 and demangle if possible."""
120 to_append = []
121 for s in self.suffixes:
122 found = s.re.match(ident)
123 if not found:
124 continue
125 to_append += [' [' + s.replacement + '.' + found.group(2) + ']']
126 ident = found.group(1) + found.group(3)
127 if len(to_append) > 0:
128 # Only try to demangle if there were suffixes.
129 ident = demangle(ident, cppfilt)
130 for s in to_append:
131 ident += s
132 return ident
133
134suffix_cleanup = SuffixCleanup()
135
136def parse_cpp_name(name, cppfilt):
137 name = suffix_cleanup.cleanup(name, cppfilt)
138
139 # Turn prefixes into suffixes so namespacing works.
140 prefixes = [
141 ['bool ', ''],
142 ['construction vtable for ', ' [construction vtable]'],
143 ['global constructors keyed to ', ' [global constructors]'],
144 ['guard variable for ', ' [guard variable]'],
145 ['int ', ''],
146 ['non-virtual thunk to ', ' [non-virtual thunk]'],
147 ['typeinfo for ', ' [typeinfo]'],
148 ['typeinfo name for ', ' [typeinfo name]'],
149 ['virtual thunk to ', ' [virtual thunk]'],
150 ['void ', ''],
151 ['vtable for ', ' [vtable]'],
152 ['VTT for ', ' [VTT]'],
153 ]
154 for prefix, replacement in prefixes:
155 if name.startswith(prefix):
156 name = name[len(prefix):] + replacement
157 # Simplify parenthesis parsing.
158 replacements = [
159 ['(anonymous namespace)', '[anonymous namespace]'],
160 ]
161 for value, replacement in replacements:
162 name = name.replace(value, replacement)
163
164 def parse_one(val):
165 """Returns (leftmost-part, remaining)."""
166 if (val.startswith('operator') and
167 not (val[8].isalnum() or val[8] == '_')):
168 # Operator overload function, terminate.
169 return (val, '')
170 co = val.find('::')
171 lt = val.find('<')
172 pa = val.find('(')
173 co = len(val) if co == -1 else co
174 lt = len(val) if lt == -1 else lt
175 pa = len(val) if pa == -1 else pa
176 if co < lt and co < pa:
177 # Namespace or type name.
178 return (val[:co], val[co+2:])
179 if lt < pa:
180 # Template. Make sure we capture nested templates too.
181 open_tmpl = 1
182 gt = lt
183 while val[gt] != '>' or open_tmpl != 0:
184 gt = gt + 1
185 if val[gt] == '<':
186 open_tmpl = open_tmpl + 1
187 if val[gt] == '>':
188 open_tmpl = open_tmpl - 1
189 ret = val[gt+1:]
190 if ret.startswith('::'):
191 ret = ret[2:]
192 if ret.startswith('('):
193 # Template function, terminate.
194 return (val, '')
195 return (val[:gt+1], ret)
196 # Terminate with any function name, identifier, or unmangled name.
197 return (val, '')
198
199 parts = []
200 while len(name) > 0:
201 (part, name) = parse_one(name)
202 assert len(part) > 0
203 parts.append(part)
204 return parts
205
206
207def treeify_syms(symbols, strip_prefix=None, cppfilt=None):
208 dirs = {}
209 for sym, type, size, path in symbols:
210 if path:
211 path = os.path.normpath(path)
212 if strip_prefix and path.startswith(strip_prefix):
213 path = path[len(strip_prefix):]
214 elif path.startswith('/'):
215 path = path[1:]
216 path = ['[path]'] + path.split('/')
217
218 parts = parse_cpp_name(sym, cppfilt)
219 if len(parts) == 1:
220 if path:
221 # No namespaces, group with path.
222 parts = path + parts
223 else:
224 new_prefix = ['[ungrouped]']
225 regroups = [
226 ['.L.str', '[str]'],
227 ['.L__PRETTY_FUNCTION__.', '[__PRETTY_FUNCTION__]'],
228 ['.L__func__.', '[__func__]'],
229 ['.Lswitch.table', '[switch table]'],
230 ]
231 for prefix, group in regroups:
232 if parts[0].startswith(prefix):
233 parts[0] = parts[0][len(prefix):]
234 parts[0] = demangle(parts[0], cppfilt)
235 new_prefix += [group]
236 break
237 parts = new_prefix + parts
238
239 key = parts.pop()
240 tree = dirs
241 try:
242 depth = 0
243 for part in parts:
244 depth = depth + 1
245 assert part != '', path
246 if part not in tree:
247 tree[part] = {'$bloat_symbols':{}}
248 if type not in tree[part]['$bloat_symbols']:
249 tree[part]['$bloat_symbols'][type] = 0
250 tree[part]['$bloat_symbols'][type] += 1
251 tree = tree[part]
252 old_size, old_symbols = tree.get(key, (0, {}))
253 if type not in old_symbols:
254 old_symbols[type] = 0
255 old_symbols[type] += 1
256 tree[key] = (old_size + size, old_symbols)
257 except:
258 print >>sys.stderr, 'sym `%s`\tparts `%s`\tkey `%s`' % (sym, parts, key)
259 raise
260 return dirs
261
262
263def jsonify_tree(tree, name):
264 children = []
265 total = 0
266 files = 0
267
268 for key, val in tree.iteritems():
269 if key == '$bloat_symbols':
270 continue
271 if isinstance(val, dict):
272 subtree = jsonify_tree(val, key)
273 total += subtree['data']['$area']
274 children.append(subtree)
275 else:
276 (size, symbols) = val
277 total += size
278 assert len(symbols) == 1, symbols.values()[0] == 1
279 symbol = symbol_type_to_human(symbols.keys()[0])
280 children.append({
281 'name': key + ' ' + format_bytes(size),
282 'data': {
283 '$area': size,
284 '$symbol': symbol,
285 }
286 })
287
288 children.sort(key=lambda child: -child['data']['$area'])
289 dominant_symbol = ''
290 if '$bloat_symbols' in tree:
291 dominant_symbol = symbol_type_to_human(
292 max(tree['$bloat_symbols'].iteritems(),
293 key=operator.itemgetter(1))[0])
294 return {
295 'name': name + ' ' + format_bytes(total),
296 'data': {
297 '$area': total,
298 '$dominant_symbol': dominant_symbol,
299 },
300 'children': children,
301 }
302
303
304def dump_nm(nmfile, strip_prefix, cppfilt):
305 dirs = treeify_syms(parse_nm(nmfile), strip_prefix, cppfilt)
306 print ('var kTree = ' +
307 json.dumps(jsonify_tree(dirs, '[everything]'), indent=2))
308
309
310def parse_objdump(input):
311 """Parse objdump -h output."""
312 sec_re = re.compile('^\d+ (\S+) +([0-9a-z]+)')
313 sections = []
314 debug_sections = []
315
316 for line in input:
317 line = line.strip()
318 match = sec_re.match(line)
319 if match:
320 name, size = match.groups()
321 if name.startswith('.'):
322 name = name[1:]
323 if name.startswith('debug_'):
324 name = name[len('debug_'):]
325 debug_sections.append((name, int(size, 16)))
326 else:
327 sections.append((name, int(size, 16)))
328 continue
329 return sections, debug_sections
330
331
332def jsonify_sections(name, sections):
333 children = []
334 total = 0
335 for section, size in sections:
336 children.append({
337 'name': section + ' ' + format_bytes(size),
338 'data': { '$area': size }
339 })
340 total += size
341
342 children.sort(key=lambda child: -child['data']['$area'])
343
344 return {
345 'name': name + ' ' + format_bytes(total),
346 'data': { '$area': total },
347 'children': children
348 }
349
350
351def dump_sections(objdump):
352 sections, debug_sections = parse_objdump(objdump)
353 sections = jsonify_sections('sections', sections)
354 debug_sections = jsonify_sections('debug', debug_sections)
355 size = sections['data']['$area'] + debug_sections['data']['$area']
356 print 'var kTree = ' + json.dumps({
357 'name': 'top ' + format_bytes(size),
358 'data': { '$area': size },
359 'children': [ debug_sections, sections ]})
360
361
362usage="""%prog [options] MODE
363
364Modes are:
365 syms: output symbols json suitable for a treemap
366 dump: print symbols sorted by size (pipe to head for best output)
367 sections: output binary sections json suitable for a treemap
368
369nm output passed to --nm-output should from running a command
370like the following (note, can take a long time -- 30 minutes):
371 nm -C -S -l /path/to/binary > nm.out
372
373objdump output passed to --objdump-output should be from a command
374like:
375 objdump -h /path/to/binary > objdump.out"""
376parser = optparse.OptionParser(usage=usage)
377parser.add_option('--nm-output', action='store', dest='nmpath',
378 metavar='PATH', default='nm.out',
379 help='path to nm output [default=nm.out]')
380parser.add_option('--objdump-output', action='store', dest='objdumppath',
381 metavar='PATH', default='objdump.out',
382 help='path to objdump output [default=objdump.out]')
383parser.add_option('--strip-prefix', metavar='PATH', action='store',
384 help='strip PATH prefix from paths; e.g. /path/to/src/root')
385parser.add_option('--filter', action='store',
386 help='include only symbols/files matching FILTER')
387parser.add_option('--c++filt', action='store', metavar='PATH', dest='cppfilt',
388 default='c++filt', help="Path to c++filt, used to demangle "
389 "symbols that weren't handled by nm. Set to an invalid path "
390 "to disable.")
391opts, args = parser.parse_args()
392
393if len(args) != 1:
394 parser.print_usage()
395 sys.exit(1)
396
397mode = args[0]
398if mode == 'syms':
399 nmfile = open(opts.nmpath, 'r')
400 try:
401 res = subprocess.check_output([opts.cppfilt, 'main'])
402 if res.strip() != 'main':
403 print >>sys.stderr, ("%s failed demangling, "
404 "output won't be demangled." % opt.cppfilt)
405 opts.cppfilt = None
406 except:
407 print >>sys.stderr, ("Could not find c++filt at %s, "
408 "output won't be demangled." % opt.cppfilt)
409 opts.cppfilt = None
410 dump_nm(nmfile, strip_prefix=opts.strip_prefix, cppfilt=opts.cppfilt)
411elif mode == 'sections':
412 objdumpfile = open(opts.objdumppath, 'r')
413 dump_sections(objdumpfile)
414elif mode == 'dump':
415 nmfile = open(opts.nmpath, 'r')
416 syms = list(parse_nm(nmfile))
417 # a list of (sym, type, size, path); sort by size.
418 syms.sort(key=lambda x: -x[2])
419 total = 0
420 for sym, type, size, path in syms:
421 if type in ('b', 'w'):
422 continue # skip bss and weak symbols
423 if path is None:
424 path = ''
425 if opts.filter and not (opts.filter in sym or opts.filter in path):
426 continue
427 print '%6s %s (%s) %s' % (format_bytes(size), sym,
428 symbol_type_to_human(type), path)
429 total += size
430 print '%6s %s' % (format_bytes(total), 'total'),
431else:
432 print 'unknown mode'
433 parser.print_usage()