Jim Stichnoth | 307e326 | 2015-02-12 16:10:37 -0800 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # |
| 3 | # Copyright 2013 Google Inc. All Rights Reserved. |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | # you may not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| 16 | |
| 17 | import fileinput |
| 18 | import operator |
| 19 | import optparse |
| 20 | import os |
| 21 | import pprint |
| 22 | import re |
| 23 | import subprocess |
| 24 | import sys |
| 25 | import json |
| 26 | |
| 27 | def format_bytes(bytes): |
| 28 | """Pretty-print a number of bytes.""" |
| 29 | if bytes > 1e6: |
| 30 | bytes = bytes / 1.0e6 |
| 31 | return '%.1fm' % bytes |
| 32 | if bytes > 1e3: |
| 33 | bytes = bytes / 1.0e3 |
| 34 | return '%.1fk' % bytes |
| 35 | return str(bytes) |
| 36 | |
| 37 | |
| 38 | def symbol_type_to_human(type): |
| 39 | """Convert a symbol type as printed by nm into a human-readable name.""" |
| 40 | return { |
| 41 | 'b': 'bss', |
| 42 | 'd': 'data', |
| 43 | 'r': 'read-only data', |
| 44 | 't': 'code', |
| 45 | 'u': 'weak symbol', # Unique global. |
| 46 | 'w': 'weak symbol', |
| 47 | 'v': 'weak symbol' |
| 48 | }[type] |
| 49 | |
| 50 | |
| 51 | def parse_nm(input): |
| 52 | """Parse nm output. |
| 53 | |
| 54 | Argument: an iterable over lines of nm output. |
| 55 | |
| 56 | Yields: (symbol name, symbol type, symbol size, source file path). |
| 57 | Path may be None if nm couldn't figure out the source file. |
| 58 | """ |
| 59 | |
| 60 | # Match lines with size + symbol + optional filename. |
| 61 | sym_re = re.compile(r'^[0-9a-f]+ ([0-9a-f]+) (.) ([^\t]+)(?:\t(.*):\d+)?$') |
| 62 | |
| 63 | # Match lines with addr but no size. |
| 64 | addr_re = re.compile(r'^[0-9a-f]+ (.) ([^\t]+)(?:\t.*)?$') |
| 65 | # Match lines that don't have an address at all -- typically external symbols. |
| 66 | noaddr_re = re.compile(r'^ + (.) (.*)$') |
| 67 | |
| 68 | for line in input: |
| 69 | line = line.rstrip() |
| 70 | match = sym_re.match(line) |
| 71 | if match: |
| 72 | size, type, sym = match.groups()[0:3] |
| 73 | size = int(size, 16) |
| 74 | type = type.lower() |
| 75 | if type in ['u', 'v']: |
| 76 | type = 'w' # just call them all weak |
| 77 | if type == 'b': |
| 78 | continue # skip all BSS for now |
| 79 | path = match.group(4) |
| 80 | yield sym, type, size, path |
| 81 | continue |
| 82 | match = addr_re.match(line) |
| 83 | if match: |
| 84 | type, sym = match.groups()[0:2] |
| 85 | # No size == we don't care. |
| 86 | continue |
| 87 | match = noaddr_re.match(line) |
| 88 | if match: |
| 89 | type, sym = match.groups() |
| 90 | if type in ('U', 'w'): |
| 91 | # external or weak symbol |
| 92 | continue |
| 93 | |
| 94 | print >>sys.stderr, 'unparsed:', repr(line) |
| 95 | |
| 96 | def demangle(ident, cppfilt): |
| 97 | if cppfilt and ident.startswith('_Z'): |
| 98 | # Demangle names when possible. Mangled names all start with _Z. |
| 99 | ident = subprocess.check_output([cppfilt, ident]).strip() |
| 100 | return ident |
| 101 | |
| 102 | |
| 103 | class Suffix: |
| 104 | def __init__(self, suffix, replacement): |
| 105 | self.pattern = '^(.*)' + suffix + '(.*)$' |
| 106 | self.re = re.compile(self.pattern) |
| 107 | self.replacement = replacement |
| 108 | |
| 109 | class SuffixCleanup: |
| 110 | """Pre-compile suffix regular expressions.""" |
| 111 | def __init__(self): |
| 112 | self.suffixes = [ |
| 113 | Suffix('\.part\.([0-9]+)', 'part'), |
| 114 | Suffix('\.constprop\.([0-9]+)', 'constprop'), |
| 115 | Suffix('\.isra\.([0-9]+)', 'isra'), |
| 116 | ] |
| 117 | def cleanup(self, ident, cppfilt): |
| 118 | """Cleanup identifiers that have suffixes preventing demangling, |
| 119 | and demangle if possible.""" |
| 120 | to_append = [] |
| 121 | for s in self.suffixes: |
| 122 | found = s.re.match(ident) |
| 123 | if not found: |
| 124 | continue |
| 125 | to_append += [' [' + s.replacement + '.' + found.group(2) + ']'] |
| 126 | ident = found.group(1) + found.group(3) |
| 127 | if len(to_append) > 0: |
| 128 | # Only try to demangle if there were suffixes. |
| 129 | ident = demangle(ident, cppfilt) |
| 130 | for s in to_append: |
| 131 | ident += s |
| 132 | return ident |
| 133 | |
| 134 | suffix_cleanup = SuffixCleanup() |
| 135 | |
| 136 | def parse_cpp_name(name, cppfilt): |
| 137 | name = suffix_cleanup.cleanup(name, cppfilt) |
| 138 | |
| 139 | # Turn prefixes into suffixes so namespacing works. |
| 140 | prefixes = [ |
| 141 | ['bool ', ''], |
| 142 | ['construction vtable for ', ' [construction vtable]'], |
| 143 | ['global constructors keyed to ', ' [global constructors]'], |
| 144 | ['guard variable for ', ' [guard variable]'], |
| 145 | ['int ', ''], |
| 146 | ['non-virtual thunk to ', ' [non-virtual thunk]'], |
| 147 | ['typeinfo for ', ' [typeinfo]'], |
| 148 | ['typeinfo name for ', ' [typeinfo name]'], |
| 149 | ['virtual thunk to ', ' [virtual thunk]'], |
| 150 | ['void ', ''], |
| 151 | ['vtable for ', ' [vtable]'], |
| 152 | ['VTT for ', ' [VTT]'], |
| 153 | ] |
| 154 | for prefix, replacement in prefixes: |
| 155 | if name.startswith(prefix): |
| 156 | name = name[len(prefix):] + replacement |
| 157 | # Simplify parenthesis parsing. |
| 158 | replacements = [ |
| 159 | ['(anonymous namespace)', '[anonymous namespace]'], |
| 160 | ] |
| 161 | for value, replacement in replacements: |
| 162 | name = name.replace(value, replacement) |
| 163 | |
| 164 | def parse_one(val): |
| 165 | """Returns (leftmost-part, remaining).""" |
| 166 | if (val.startswith('operator') and |
| 167 | not (val[8].isalnum() or val[8] == '_')): |
| 168 | # Operator overload function, terminate. |
| 169 | return (val, '') |
| 170 | co = val.find('::') |
| 171 | lt = val.find('<') |
| 172 | pa = val.find('(') |
| 173 | co = len(val) if co == -1 else co |
| 174 | lt = len(val) if lt == -1 else lt |
| 175 | pa = len(val) if pa == -1 else pa |
| 176 | if co < lt and co < pa: |
| 177 | # Namespace or type name. |
| 178 | return (val[:co], val[co+2:]) |
| 179 | if lt < pa: |
| 180 | # Template. Make sure we capture nested templates too. |
| 181 | open_tmpl = 1 |
| 182 | gt = lt |
| 183 | while val[gt] != '>' or open_tmpl != 0: |
| 184 | gt = gt + 1 |
| 185 | if val[gt] == '<': |
| 186 | open_tmpl = open_tmpl + 1 |
| 187 | if val[gt] == '>': |
| 188 | open_tmpl = open_tmpl - 1 |
| 189 | ret = val[gt+1:] |
| 190 | if ret.startswith('::'): |
| 191 | ret = ret[2:] |
| 192 | if ret.startswith('('): |
| 193 | # Template function, terminate. |
| 194 | return (val, '') |
| 195 | return (val[:gt+1], ret) |
| 196 | # Terminate with any function name, identifier, or unmangled name. |
| 197 | return (val, '') |
| 198 | |
| 199 | parts = [] |
| 200 | while len(name) > 0: |
| 201 | (part, name) = parse_one(name) |
| 202 | assert len(part) > 0 |
| 203 | parts.append(part) |
| 204 | return parts |
| 205 | |
| 206 | |
| 207 | def treeify_syms(symbols, strip_prefix=None, cppfilt=None): |
| 208 | dirs = {} |
| 209 | for sym, type, size, path in symbols: |
| 210 | if path: |
| 211 | path = os.path.normpath(path) |
| 212 | if strip_prefix and path.startswith(strip_prefix): |
| 213 | path = path[len(strip_prefix):] |
| 214 | elif path.startswith('/'): |
| 215 | path = path[1:] |
| 216 | path = ['[path]'] + path.split('/') |
| 217 | |
| 218 | parts = parse_cpp_name(sym, cppfilt) |
| 219 | if len(parts) == 1: |
| 220 | if path: |
| 221 | # No namespaces, group with path. |
| 222 | parts = path + parts |
| 223 | else: |
| 224 | new_prefix = ['[ungrouped]'] |
| 225 | regroups = [ |
| 226 | ['.L.str', '[str]'], |
| 227 | ['.L__PRETTY_FUNCTION__.', '[__PRETTY_FUNCTION__]'], |
| 228 | ['.L__func__.', '[__func__]'], |
| 229 | ['.Lswitch.table', '[switch table]'], |
| 230 | ] |
| 231 | for prefix, group in regroups: |
| 232 | if parts[0].startswith(prefix): |
| 233 | parts[0] = parts[0][len(prefix):] |
| 234 | parts[0] = demangle(parts[0], cppfilt) |
| 235 | new_prefix += [group] |
| 236 | break |
| 237 | parts = new_prefix + parts |
| 238 | |
| 239 | key = parts.pop() |
| 240 | tree = dirs |
| 241 | try: |
| 242 | depth = 0 |
| 243 | for part in parts: |
| 244 | depth = depth + 1 |
| 245 | assert part != '', path |
| 246 | if part not in tree: |
| 247 | tree[part] = {'$bloat_symbols':{}} |
| 248 | if type not in tree[part]['$bloat_symbols']: |
| 249 | tree[part]['$bloat_symbols'][type] = 0 |
| 250 | tree[part]['$bloat_symbols'][type] += 1 |
| 251 | tree = tree[part] |
| 252 | old_size, old_symbols = tree.get(key, (0, {})) |
| 253 | if type not in old_symbols: |
| 254 | old_symbols[type] = 0 |
| 255 | old_symbols[type] += 1 |
| 256 | tree[key] = (old_size + size, old_symbols) |
| 257 | except: |
| 258 | print >>sys.stderr, 'sym `%s`\tparts `%s`\tkey `%s`' % (sym, parts, key) |
| 259 | raise |
| 260 | return dirs |
| 261 | |
| 262 | |
| 263 | def jsonify_tree(tree, name): |
| 264 | children = [] |
| 265 | total = 0 |
| 266 | files = 0 |
| 267 | |
| 268 | for key, val in tree.iteritems(): |
| 269 | if key == '$bloat_symbols': |
| 270 | continue |
| 271 | if isinstance(val, dict): |
| 272 | subtree = jsonify_tree(val, key) |
| 273 | total += subtree['data']['$area'] |
| 274 | children.append(subtree) |
| 275 | else: |
| 276 | (size, symbols) = val |
| 277 | total += size |
| 278 | assert len(symbols) == 1, symbols.values()[0] == 1 |
| 279 | symbol = symbol_type_to_human(symbols.keys()[0]) |
| 280 | children.append({ |
| 281 | 'name': key + ' ' + format_bytes(size), |
| 282 | 'data': { |
| 283 | '$area': size, |
| 284 | '$symbol': symbol, |
| 285 | } |
| 286 | }) |
| 287 | |
| 288 | children.sort(key=lambda child: -child['data']['$area']) |
| 289 | dominant_symbol = '' |
| 290 | if '$bloat_symbols' in tree: |
| 291 | dominant_symbol = symbol_type_to_human( |
| 292 | max(tree['$bloat_symbols'].iteritems(), |
| 293 | key=operator.itemgetter(1))[0]) |
| 294 | return { |
| 295 | 'name': name + ' ' + format_bytes(total), |
| 296 | 'data': { |
| 297 | '$area': total, |
| 298 | '$dominant_symbol': dominant_symbol, |
| 299 | }, |
| 300 | 'children': children, |
| 301 | } |
| 302 | |
| 303 | |
| 304 | def dump_nm(nmfile, strip_prefix, cppfilt): |
| 305 | dirs = treeify_syms(parse_nm(nmfile), strip_prefix, cppfilt) |
| 306 | print ('var kTree = ' + |
| 307 | json.dumps(jsonify_tree(dirs, '[everything]'), indent=2)) |
| 308 | |
| 309 | |
| 310 | def parse_objdump(input): |
| 311 | """Parse objdump -h output.""" |
| 312 | sec_re = re.compile('^\d+ (\S+) +([0-9a-z]+)') |
| 313 | sections = [] |
| 314 | debug_sections = [] |
| 315 | |
| 316 | for line in input: |
| 317 | line = line.strip() |
| 318 | match = sec_re.match(line) |
| 319 | if match: |
| 320 | name, size = match.groups() |
| 321 | if name.startswith('.'): |
| 322 | name = name[1:] |
| 323 | if name.startswith('debug_'): |
| 324 | name = name[len('debug_'):] |
| 325 | debug_sections.append((name, int(size, 16))) |
| 326 | else: |
| 327 | sections.append((name, int(size, 16))) |
| 328 | continue |
| 329 | return sections, debug_sections |
| 330 | |
| 331 | |
| 332 | def jsonify_sections(name, sections): |
| 333 | children = [] |
| 334 | total = 0 |
| 335 | for section, size in sections: |
| 336 | children.append({ |
| 337 | 'name': section + ' ' + format_bytes(size), |
| 338 | 'data': { '$area': size } |
| 339 | }) |
| 340 | total += size |
| 341 | |
| 342 | children.sort(key=lambda child: -child['data']['$area']) |
| 343 | |
| 344 | return { |
| 345 | 'name': name + ' ' + format_bytes(total), |
| 346 | 'data': { '$area': total }, |
| 347 | 'children': children |
| 348 | } |
| 349 | |
| 350 | |
| 351 | def dump_sections(objdump): |
| 352 | sections, debug_sections = parse_objdump(objdump) |
| 353 | sections = jsonify_sections('sections', sections) |
| 354 | debug_sections = jsonify_sections('debug', debug_sections) |
| 355 | size = sections['data']['$area'] + debug_sections['data']['$area'] |
| 356 | print 'var kTree = ' + json.dumps({ |
| 357 | 'name': 'top ' + format_bytes(size), |
| 358 | 'data': { '$area': size }, |
| 359 | 'children': [ debug_sections, sections ]}) |
| 360 | |
| 361 | |
| 362 | usage="""%prog [options] MODE |
| 363 | |
| 364 | Modes are: |
| 365 | syms: output symbols json suitable for a treemap |
| 366 | dump: print symbols sorted by size (pipe to head for best output) |
| 367 | sections: output binary sections json suitable for a treemap |
| 368 | |
| 369 | nm output passed to --nm-output should from running a command |
| 370 | like the following (note, can take a long time -- 30 minutes): |
| 371 | nm -C -S -l /path/to/binary > nm.out |
| 372 | |
| 373 | objdump output passed to --objdump-output should be from a command |
| 374 | like: |
| 375 | objdump -h /path/to/binary > objdump.out""" |
| 376 | parser = optparse.OptionParser(usage=usage) |
| 377 | parser.add_option('--nm-output', action='store', dest='nmpath', |
| 378 | metavar='PATH', default='nm.out', |
| 379 | help='path to nm output [default=nm.out]') |
| 380 | parser.add_option('--objdump-output', action='store', dest='objdumppath', |
| 381 | metavar='PATH', default='objdump.out', |
| 382 | help='path to objdump output [default=objdump.out]') |
| 383 | parser.add_option('--strip-prefix', metavar='PATH', action='store', |
| 384 | help='strip PATH prefix from paths; e.g. /path/to/src/root') |
| 385 | parser.add_option('--filter', action='store', |
| 386 | help='include only symbols/files matching FILTER') |
| 387 | parser.add_option('--c++filt', action='store', metavar='PATH', dest='cppfilt', |
| 388 | default='c++filt', help="Path to c++filt, used to demangle " |
| 389 | "symbols that weren't handled by nm. Set to an invalid path " |
| 390 | "to disable.") |
| 391 | opts, args = parser.parse_args() |
| 392 | |
| 393 | if len(args) != 1: |
| 394 | parser.print_usage() |
| 395 | sys.exit(1) |
| 396 | |
| 397 | mode = args[0] |
| 398 | if mode == 'syms': |
| 399 | nmfile = open(opts.nmpath, 'r') |
| 400 | try: |
| 401 | res = subprocess.check_output([opts.cppfilt, 'main']) |
| 402 | if res.strip() != 'main': |
| 403 | print >>sys.stderr, ("%s failed demangling, " |
| 404 | "output won't be demangled." % opt.cppfilt) |
| 405 | opts.cppfilt = None |
| 406 | except: |
| 407 | print >>sys.stderr, ("Could not find c++filt at %s, " |
| 408 | "output won't be demangled." % opt.cppfilt) |
| 409 | opts.cppfilt = None |
| 410 | dump_nm(nmfile, strip_prefix=opts.strip_prefix, cppfilt=opts.cppfilt) |
| 411 | elif mode == 'sections': |
| 412 | objdumpfile = open(opts.objdumppath, 'r') |
| 413 | dump_sections(objdumpfile) |
| 414 | elif mode == 'dump': |
| 415 | nmfile = open(opts.nmpath, 'r') |
| 416 | syms = list(parse_nm(nmfile)) |
| 417 | # a list of (sym, type, size, path); sort by size. |
| 418 | syms.sort(key=lambda x: -x[2]) |
| 419 | total = 0 |
| 420 | for sym, type, size, path in syms: |
| 421 | if type in ('b', 'w'): |
| 422 | continue # skip bss and weak symbols |
| 423 | if path is None: |
| 424 | path = '' |
| 425 | if opts.filter and not (opts.filter in sym or opts.filter in path): |
| 426 | continue |
| 427 | print '%6s %s (%s) %s' % (format_bytes(size), sym, |
| 428 | symbol_type_to_human(type), path) |
| 429 | total += size |
| 430 | print '%6s %s' % (format_bytes(total), 'total'), |
| 431 | else: |
| 432 | print 'unknown mode' |
| 433 | parser.print_usage() |