Blame - post_build_ninja_summary.py - chromium.googlesource.com/chromium/tools/depot_tools

blob: 4fc734480423a14b3e37d0a21d6515a63e833a70 [file] [log] [blame]

Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	1	# Copyright (c) 2018 The Chromium Authors. All rights reserved.
				2	# Use of this source code is governed by a BSD-style license that can be
				3	# found in the LICENSE file.
				4
				5	"""Summarize the last ninja build, invoked with ninja's -C syntax.
				6
				7	This script is designed to be automatically run after each ninja build in
				8	order to summarize the build's performance. Making build performance information
				9	more visible should make it easier to notice anomalies and opportunities. To use
Bruce Dawson	e186e50	2018-02-12 15:41:11 -0800	[diff] [blame]	10	this script on Windows just set NINJA_SUMMARIZE_BUILD=1 and run autoninja.bat.
				11
				12	On Linux you can get autoninja to invoke this script using this syntax:
				13
				14	$ NINJA_SUMMARIZE_BUILD=1 autoninja -C out/Default/ chrome
Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	15
				16	You can also call this script directly using ninja's syntax to specify the
				17	output directory of interest:
				18
				19	> python post_build_ninja_summary.py -C out/Default
				20
				21	Typical output looks like this:
				22
				23	>ninja -C out\debug_component base
				24	ninja.exe -C out\debug_component base -j 960 -l 48 -d keeprsp
				25	ninja: Entering directory `out\debug_component'
				26	[1 processes, 1/1 @ 0.3/s : 3.092s ] Regenerating ninja files
				27	[1 processes, 23/23 @ 0.9/s : 26.280s ] LINK(DLL) base.dll base.dll.lib
				28	Longest build steps:
				29	0.9 weighted s to build obj/base/base_jumbo_17.obj (5.0 s CPU time)
				30	1.0 weighted s to build obj/base/base_jumbo_31.obj (13.1 s CPU time)
				31	1.2 weighted s to build obj/base/base_jumbo_4.obj (14.7 s CPU time)
				32	1.3 weighted s to build obj/base/base_jumbo_32.obj (15.0 s CPU time)
				33	1.6 weighted s to build obj/base/base_jumbo_26.obj (17.1 s CPU time)
				34	1.7 weighted s to build base.dll, base.dll.lib (1.7 s CPU time)
				35	1.7 weighted s to build obj/base/base_jumbo_11.obj (15.9 s CPU time)
				36	1.9 weighted s to build obj/base/base_jumbo_12.obj (18.5 s CPU time)
				37	3.6 weighted s to build obj/base/base_jumbo_34.obj (20.1 s CPU time)
				38	4.3 weighted s to build obj/base/base_jumbo_33.obj (22.3 s CPU time)
				39	Time by build-step type:
				40	0.1 s weighted time to generate 1 .c files (0.1 s CPU time)
				41	0.1 s weighted time to generate 1 .stamp files (0.1 s CPU time)
				42	0.2 s weighted time to generate 1 .h files (0.2 s CPU time)
				43	1.7 s weighted time to generate 1 PEFile files (1.7 s CPU time)
				44	24.3 s weighted time to generate 19 .obj files (233.4 s CPU time)
				45	26.3 s weighted time (235.5 s CPU time, 9.0x parallelism)
				46	23 build steps completed, average of 0.88/s
				47
				48	If no gn clean has been done then results will be for the last non-NULL
				49	invocation of ninja. Ideas for future statistics, and implementations are
				50	appreciated.
				51
				52	The "weighted" time is the elapsed time of each build step divided by the number
				53	of tasks that were running in parallel. This makes it an excellent approximation
				54	of how "important" a slow step was. A link that is entirely or mostly serialized
				55	will have a weighted time that is the same or similar to its elapsed time. A
				56	compile that runs in parallel with 999 other compiles will have a weighted time
				57	that is tiny."""
				58
				59	import errno
				60	import os
				61	import sys
				62
				63
				64	# The number of long build times to report:
				65	long_count = 10
				66	# The number of long times by extension to report
				67	long_ext_count = 5
				68
				69
				70	class Target:
				71	"""Represents a single line read for a .ninja_log file."""
				72	def __init__(self, start, end):
				73	"""Creates a target object by passing in the start/end times in ms."""
				74	# Convert from milliseconds to seconds.
				75	self.start = int(start) / 1000.0
				76	self.end = int(end) / 1000.0
				77	# A list of targets, appended to by the owner of this object.
				78	self.targets = []
				79	self.weighted_duration = 0.0
				80
				81	def Duration(self):
				82	"""Returns the task duration in seconds as a float."""
				83	return self.end - self.start
				84
				85	def SetWeightedDuration(self, weighted_duration):
				86	"""Sets the duration, in seconds, passed in as a float."""
				87	self.weighted_duration = weighted_duration
				88
				89	def WeightedDuration(self):
				90	"""Returns the task's weighted duration in seconds as a float.
				91
				92	Weighted_duration takes the elapsed time of the task and divides it
				93	by how many other tasks were running at the same time. Thus, it
				94	represents the approximate impact of this task on the total build time,
				95	with serialized or serializing steps typically ending up with much
				96	longer weighted durations.
				97	weighted_duration should always be the same or shorter than duration.
				98	"""
				99	# Allow for modest floating-point errors
				100	epsilon = 0.000002
				101	if (self.weighted_duration > self.Duration() + epsilon):
				102	print '%s > %s?' % (self.weighted_duration, self.Duration())
				103	assert(self.weighted_duration <= self.Duration() + epsilon)
				104	return self.weighted_duration
				105
				106	def DescribeTargets(self):
				107	"""Returns a printable string that summarizes the targets."""
				108	if len(self.targets) == 1:
				109	return self.targets[0]
				110	# Some build steps generate dozens of outputs - handle them sanely.
				111	# It's a bit odd that if there are three targets we return all three
				112	# but if there are more than three we just return two, but this works
				113	# well in practice.
				114	elif len(self.targets) > 3:
				115	return '(%d items) ' % len(self.targets) + (
				116	', '.join(self.targets[:2]) + ', ...')
				117	else:
				118	return ', '.join(self.targets)
				119
				120
				121	# Copied with some modifications from ninjatracing
				122	def ReadTargets(log, show_all):
				123	"""Reads all targets from .ninja_log file \|log_file\|, sorted by duration.
				124
				125	The result is a list of Target objects."""
				126	header = log.readline()
				127	assert header == '# ninja log v5\n', \
				128	'unrecognized ninja log version %r' % header
				129	targets = {}
				130	last_end_seen = 0
				131	for line in log:
				132	parts = line.strip().split('\t')
				133	if len(parts) != 5:
				134	# If ninja.exe is rudely halted then the .ninja_log file may be
				135	# corrupt. Silently continue.
				136	continue
				137	start, end, _, name, cmdhash = parts # Ignore restat.
				138	if not show_all and int(end) < last_end_seen:
				139	# An earlier time stamp means that this step is the first in a new
				140	# build, possibly an incremental build. Throw away the previous
				141	# data so that this new build will be displayed independently.
				142	targets = {}
				143	last_end_seen = int(end)
				144	targets.setdefault(cmdhash, Target(start, end)).targets.append(name)
				145	return targets.values()
				146
				147
				148	def GetExtension(target):
				149	"""Return the file extension that best represents a target.
				150
				151	For targets that generate multiple outputs it is important to return a
				152	consistent 'canonical' extension. Ultimately the goal is to group build steps
				153	by type."""
				154	for output in target.targets:
				155	# Normalize all mojo related outputs to 'mojo'.
				156	if output.count('.mojom') > 0:
				157	extension = 'mojo'
				158	break
				159	# Not a true extension, but a good grouping.
				160	if output.endswith('type_mappings'):
				161	extension = 'type_mappings'
				162	break
				163	extension = os.path.splitext(output)[1]
				164	if len(extension) == 0:
				165	extension = '(no extension found)'
Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	166	if extension in ['.pdb', '.dll', '.exe']:
				167	extension = 'PEFile (linking)'
				168	# Make sure that .dll and .exe are grouped together and that the
				169	# .dll.lib files don't cause these to be listed as libraries
				170	break
Bruce Dawson	e186e50	2018-02-12 15:41:11 -0800	[diff] [blame]	171	if extension in ['.so', '.TOC']:
				172	extension = '.so (linking)'
				173	# Attempt to identify linking, avoid identifying as '.TOC'
				174	break
Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	175	return extension
				176
				177
				178	def SummarizeEntries(entries):
				179	"""Print a summary of the passed in list of Target objects."""
				180
				181	# Create a list that is in order by time stamp and has entries for the
				182	# beginning and ending of each build step (one time stamp may have multiple
				183	# entries due to multiple steps starting/stopping at exactly the same time).
				184	# Iterate through this list, keeping track of which tasks are running at all
				185	# times. At each time step calculate a running total for weighted time so
				186	# that when each task ends its own weighted time can easily be calculated.
				187	task_start_stop_times = []
				188
				189	earliest = -1
				190	latest = 0
				191	total_cpu_time = 0
				192	for target in entries:
				193	if earliest < 0 or target.start < earliest:
				194	earliest = target.start
				195	if target.end > latest:
				196	latest = target.end
				197	total_cpu_time += target.Duration()
				198	task_start_stop_times.append((target.start, 'start', target))
				199	task_start_stop_times.append((target.end, 'stop', target))
				200	length = latest - earliest
				201	weighted_total = 0.0
				202
				203	task_start_stop_times.sort()
				204	# Now we have all task start/stop times sorted by when they happen. If a
				205	# task starts and stops on the same time stamp then the start will come
				206	# first because of the alphabet, which is important for making this work
				207	# correctly.
				208	# Track the tasks which are currently running.
				209	running_tasks = {}
				210	# Record the time we have processed up to so we know how to calculate time
				211	# deltas.
				212	last_time = task_start_stop_times[0][0]
				213	# Track the accumulated weighted time so that it can efficiently be added
				214	# to individual tasks.
				215	last_weighted_time = 0.0
				216	# Scan all start/stop events.
				217	for event in task_start_stop_times:
				218	time, action_name, target = event
				219	# Accumulate weighted time up to now.
				220	num_running = len(running_tasks)
				221	if num_running > 0:
				222	# Update the total weighted time up to this moment.
				223	last_weighted_time += (time - last_time) / float(num_running)
				224	if action_name == 'start':
				225	# Record the total weighted task time when this task starts.
				226	running_tasks[target] = last_weighted_time
				227	if action_name == 'stop':
				228	# Record the change in the total weighted task time while this task ran.
				229	weighted_duration = last_weighted_time - running_tasks[target]
				230	target.SetWeightedDuration(weighted_duration)
				231	weighted_total += weighted_duration
				232	del running_tasks[target]
				233	last_time = time
				234	assert(len(running_tasks) == 0)
				235
				236	# Warn if the sum of weighted times is off by more than half a second.
				237	if abs(length - weighted_total) > 500:
				238	print 'Discrepancy!!! Length = %.3f, weighted total = %.3f' % (
				239	length, weighted_total)
				240
				241	# Print the slowest build steps (by weighted time).
				242	print ' Longest build steps:'
				243	entries.sort(key=lambda x: x.WeightedDuration())
				244	for target in entries[-long_count:]:
				245	print ' %8.1f weighted s to build %s (%.1f s CPU time)' % (
				246	target.WeightedDuration(),
				247	target.DescribeTargets(), target.Duration())
				248
				249	# Sum up the time by file extension/type of the output file
				250	count_by_ext = {}
				251	time_by_ext = {}
				252	weighted_time_by_ext = {}
				253	# Scan through all of the targets to build up per-extension statistics.
				254	for target in entries:
				255	extension = GetExtension(target)
				256	time_by_ext[extension] = time_by_ext.get(extension, 0) + target.Duration()
				257	weighted_time_by_ext[extension] = weighted_time_by_ext.get(extension,
				258	0) + target.WeightedDuration()
				259	count_by_ext[extension] = count_by_ext.get(extension, 0) + 1
				260
				261	print ' Time by build-step type:'
				262	# Copy to a list with extension name and total time swapped, to (time, ext)
				263	weighted_time_by_ext_sorted = sorted((y, x) for (x, y) in
				264	weighted_time_by_ext.items())
				265	# Print the slowest build target types (by weighted time):
				266	for time, extension in weighted_time_by_ext_sorted[-long_ext_count:]:
				267	print (' %8.1f s weighted time to generate %d %s files '
				268	'(%1.1f s CPU time)') % (time, count_by_ext[extension],
				269	extension, time_by_ext[extension])
				270
				271	print ' %.1f s weighted time (%.1f s CPU time, %1.1fx parallelism)' % (
				272	length, total_cpu_time,
				273	total_cpu_time * 1.0 / length)
				274	print ' %d build steps completed, average of %1.2f/s' % (
				275	len(entries), len(entries) / (length))
				276
				277
				278	def main(argv):
				279	log_file = '.ninja_log'
				280	for pid, arg in enumerate(argv):
				281	if arg == '-C':
				282	log_file = os.path.join(argv[pid+1], log_file)
				283
				284	try:
				285	with open(log_file, 'r') as log:
				286	entries = ReadTargets(log, False)
				287	SummarizeEntries(entries)
				288	except IOError:
				289	print 'No log file found, no build summary created.'
				290	return errno.ENOENT
				291
				292
				293	if __name__ == '__main__':
				294	sys.exit(main(sys.argv[1:]))