Blame - post_build_ninja_summary.py - chromium.googlesource.com/chromium/tools/depot_tools

blob: 13e133faf7bfef86ede44a30a32ea79be0a3f698 [file] [log] [blame]

Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	1	# Copyright (c) 2018 The Chromium Authors. All rights reserved.
				2	# Use of this source code is governed by a BSD-style license that can be
				3	# found in the LICENSE file.
				4
				5	"""Summarize the last ninja build, invoked with ninja's -C syntax.
				6
				7	This script is designed to be automatically run after each ninja build in
				8	order to summarize the build's performance. Making build performance information
				9	more visible should make it easier to notice anomalies and opportunities. To use
Bruce Dawson	e186e50	2018-02-12 15:41:11 -0800	[diff] [blame]	10	this script on Windows just set NINJA_SUMMARIZE_BUILD=1 and run autoninja.bat.
				11
				12	On Linux you can get autoninja to invoke this script using this syntax:
				13
				14	$ NINJA_SUMMARIZE_BUILD=1 autoninja -C out/Default/ chrome
Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	15
				16	You can also call this script directly using ninja's syntax to specify the
				17	output directory of interest:
				18
				19	> python post_build_ninja_summary.py -C out/Default
				20
				21	Typical output looks like this:
				22
				23	>ninja -C out\debug_component base
				24	ninja.exe -C out\debug_component base -j 960 -l 48 -d keeprsp
				25	ninja: Entering directory `out\debug_component'
				26	[1 processes, 1/1 @ 0.3/s : 3.092s ] Regenerating ninja files
				27	[1 processes, 23/23 @ 0.9/s : 26.280s ] LINK(DLL) base.dll base.dll.lib
				28	Longest build steps:
				29	0.9 weighted s to build obj/base/base_jumbo_17.obj (5.0 s CPU time)
				30	1.0 weighted s to build obj/base/base_jumbo_31.obj (13.1 s CPU time)
				31	1.2 weighted s to build obj/base/base_jumbo_4.obj (14.7 s CPU time)
				32	1.3 weighted s to build obj/base/base_jumbo_32.obj (15.0 s CPU time)
				33	1.6 weighted s to build obj/base/base_jumbo_26.obj (17.1 s CPU time)
				34	1.7 weighted s to build base.dll, base.dll.lib (1.7 s CPU time)
				35	1.7 weighted s to build obj/base/base_jumbo_11.obj (15.9 s CPU time)
				36	1.9 weighted s to build obj/base/base_jumbo_12.obj (18.5 s CPU time)
				37	3.6 weighted s to build obj/base/base_jumbo_34.obj (20.1 s CPU time)
				38	4.3 weighted s to build obj/base/base_jumbo_33.obj (22.3 s CPU time)
				39	Time by build-step type:
				40	0.1 s weighted time to generate 1 .c files (0.1 s CPU time)
				41	0.1 s weighted time to generate 1 .stamp files (0.1 s CPU time)
				42	0.2 s weighted time to generate 1 .h files (0.2 s CPU time)
				43	1.7 s weighted time to generate 1 PEFile files (1.7 s CPU time)
				44	24.3 s weighted time to generate 19 .obj files (233.4 s CPU time)
				45	26.3 s weighted time (235.5 s CPU time, 9.0x parallelism)
				46	23 build steps completed, average of 0.88/s
				47
				48	If no gn clean has been done then results will be for the last non-NULL
				49	invocation of ninja. Ideas for future statistics, and implementations are
				50	appreciated.
				51
				52	The "weighted" time is the elapsed time of each build step divided by the number
				53	of tasks that were running in parallel. This makes it an excellent approximation
				54	of how "important" a slow step was. A link that is entirely or mostly serialized
				55	will have a weighted time that is the same or similar to its elapsed time. A
				56	compile that runs in parallel with 999 other compiles will have a weighted time
				57	that is tiny."""
				58
Daniel Bratell	a10370c	2018-06-11 07:58:59 +0000	[diff] [blame]	59	import argparse
Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	60	import errno
				61	import os
				62	import sys
				63
				64
				65	# The number of long build times to report:
				66	long_count = 10
				67	# The number of long times by extension to report
				68	long_ext_count = 5
				69
				70
				71	class Target:
				72	"""Represents a single line read for a .ninja_log file."""
				73	def __init__(self, start, end):
Bruce Dawson	6be8afd	2018-06-11 20:00:05 +0000	[diff] [blame]	74	"""Creates a target object by passing in the start/end times in seconds
				75	as a float."""
				76	self.start = start
				77	self.end = end
Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	78	# A list of targets, appended to by the owner of this object.
				79	self.targets = []
				80	self.weighted_duration = 0.0
				81
				82	def Duration(self):
				83	"""Returns the task duration in seconds as a float."""
				84	return self.end - self.start
				85
				86	def SetWeightedDuration(self, weighted_duration):
				87	"""Sets the duration, in seconds, passed in as a float."""
				88	self.weighted_duration = weighted_duration
				89
				90	def WeightedDuration(self):
				91	"""Returns the task's weighted duration in seconds as a float.
				92
				93	Weighted_duration takes the elapsed time of the task and divides it
				94	by how many other tasks were running at the same time. Thus, it
				95	represents the approximate impact of this task on the total build time,
				96	with serialized or serializing steps typically ending up with much
				97	longer weighted durations.
				98	weighted_duration should always be the same or shorter than duration.
				99	"""
				100	# Allow for modest floating-point errors
				101	epsilon = 0.000002
				102	if (self.weighted_duration > self.Duration() + epsilon):
				103	print '%s > %s?' % (self.weighted_duration, self.Duration())
				104	assert(self.weighted_duration <= self.Duration() + epsilon)
				105	return self.weighted_duration
				106
				107	def DescribeTargets(self):
				108	"""Returns a printable string that summarizes the targets."""
				109	if len(self.targets) == 1:
				110	return self.targets[0]
				111	# Some build steps generate dozens of outputs - handle them sanely.
				112	# It's a bit odd that if there are three targets we return all three
				113	# but if there are more than three we just return two, but this works
				114	# well in practice.
				115	elif len(self.targets) > 3:
				116	return '(%d items) ' % len(self.targets) + (
				117	', '.join(self.targets[:2]) + ', ...')
				118	else:
				119	return ', '.join(self.targets)
				120
				121
				122	# Copied with some modifications from ninjatracing
				123	def ReadTargets(log, show_all):
				124	"""Reads all targets from .ninja_log file \|log_file\|, sorted by duration.
				125
				126	The result is a list of Target objects."""
				127	header = log.readline()
				128	assert header == '# ninja log v5\n', \
				129	'unrecognized ninja log version %r' % header
Bruce Dawson	6be8afd	2018-06-11 20:00:05 +0000	[diff] [blame]	130	targets_dict = {}
				131	last_end_seen = 0.0
Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	132	for line in log:
				133	parts = line.strip().split('\t')
				134	if len(parts) != 5:
				135	# If ninja.exe is rudely halted then the .ninja_log file may be
				136	# corrupt. Silently continue.
				137	continue
				138	start, end, _, name, cmdhash = parts # Ignore restat.
Bruce Dawson	6be8afd	2018-06-11 20:00:05 +0000	[diff] [blame]	139	# Convert from integral milliseconds to float seconds.
				140	start = int(start) / 1000.0
				141	end = int(end) / 1000.0
				142	if not show_all and end < last_end_seen:
Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	143	# An earlier time stamp means that this step is the first in a new
				144	# build, possibly an incremental build. Throw away the previous
				145	# data so that this new build will be displayed independently.
Bruce Dawson	6be8afd	2018-06-11 20:00:05 +0000	[diff] [blame]	146	# This has to be done by comparing end times because records are
				147	# written to the .ninja_log file when commands complete, so end
				148	# times are guaranteed to be in order, but start times are not.
				149	targets_dict = {}
				150	target = None
				151	if cmdhash in targets_dict:
				152	target = targets_dict[cmdhash]
				153	if not show_all and (target.start != start or target.end != end):
				154	# If several builds in a row just run one or two build steps then
				155	# the end times may not go backwards so the last build may not be
				156	# detected as such. However in many cases there will be a build step
				157	# repeated in the two builds and the changed start/stop points for
				158	# that command, identified by the hash, can be used to detect and
				159	# reset the target dictionary.
				160	targets_dict = {}
				161	target = None
				162	if not target:
				163	targets_dict[cmdhash] = target = Target(start, end)
				164	last_end_seen = end
				165	target.targets.append(name)
				166	return targets_dict.values()
Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	167
				168
				169	def GetExtension(target):
				170	"""Return the file extension that best represents a target.
				171
				172	For targets that generate multiple outputs it is important to return a
				173	consistent 'canonical' extension. Ultimately the goal is to group build steps
				174	by type."""
				175	for output in target.targets:
				176	# Normalize all mojo related outputs to 'mojo'.
				177	if output.count('.mojom') > 0:
				178	extension = 'mojo'
				179	break
				180	# Not a true extension, but a good grouping.
				181	if output.endswith('type_mappings'):
				182	extension = 'type_mappings'
				183	break
				184	extension = os.path.splitext(output)[1]
				185	if len(extension) == 0:
				186	extension = '(no extension found)'
Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	187	if extension in ['.pdb', '.dll', '.exe']:
				188	extension = 'PEFile (linking)'
				189	# Make sure that .dll and .exe are grouped together and that the
				190	# .dll.lib files don't cause these to be listed as libraries
				191	break
Bruce Dawson	e186e50	2018-02-12 15:41:11 -0800	[diff] [blame]	192	if extension in ['.so', '.TOC']:
				193	extension = '.so (linking)'
				194	# Attempt to identify linking, avoid identifying as '.TOC'
				195	break
Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	196	return extension
				197
				198
				199	def SummarizeEntries(entries):
				200	"""Print a summary of the passed in list of Target objects."""
				201
				202	# Create a list that is in order by time stamp and has entries for the
				203	# beginning and ending of each build step (one time stamp may have multiple
				204	# entries due to multiple steps starting/stopping at exactly the same time).
				205	# Iterate through this list, keeping track of which tasks are running at all
				206	# times. At each time step calculate a running total for weighted time so
				207	# that when each task ends its own weighted time can easily be calculated.
				208	task_start_stop_times = []
				209
				210	earliest = -1
				211	latest = 0
				212	total_cpu_time = 0
				213	for target in entries:
				214	if earliest < 0 or target.start < earliest:
				215	earliest = target.start
				216	if target.end > latest:
				217	latest = target.end
				218	total_cpu_time += target.Duration()
				219	task_start_stop_times.append((target.start, 'start', target))
				220	task_start_stop_times.append((target.end, 'stop', target))
				221	length = latest - earliest
				222	weighted_total = 0.0
				223
				224	task_start_stop_times.sort()
				225	# Now we have all task start/stop times sorted by when they happen. If a
				226	# task starts and stops on the same time stamp then the start will come
				227	# first because of the alphabet, which is important for making this work
				228	# correctly.
				229	# Track the tasks which are currently running.
				230	running_tasks = {}
				231	# Record the time we have processed up to so we know how to calculate time
				232	# deltas.
				233	last_time = task_start_stop_times[0][0]
				234	# Track the accumulated weighted time so that it can efficiently be added
				235	# to individual tasks.
				236	last_weighted_time = 0.0
				237	# Scan all start/stop events.
				238	for event in task_start_stop_times:
				239	time, action_name, target = event
				240	# Accumulate weighted time up to now.
				241	num_running = len(running_tasks)
				242	if num_running > 0:
				243	# Update the total weighted time up to this moment.
				244	last_weighted_time += (time - last_time) / float(num_running)
				245	if action_name == 'start':
				246	# Record the total weighted task time when this task starts.
				247	running_tasks[target] = last_weighted_time
				248	if action_name == 'stop':
				249	# Record the change in the total weighted task time while this task ran.
				250	weighted_duration = last_weighted_time - running_tasks[target]
				251	target.SetWeightedDuration(weighted_duration)
				252	weighted_total += weighted_duration
				253	del running_tasks[target]
				254	last_time = time
				255	assert(len(running_tasks) == 0)
				256
				257	# Warn if the sum of weighted times is off by more than half a second.
				258	if abs(length - weighted_total) > 500:
				259	print 'Discrepancy!!! Length = %.3f, weighted total = %.3f' % (
				260	length, weighted_total)
				261
				262	# Print the slowest build steps (by weighted time).
				263	print ' Longest build steps:'
				264	entries.sort(key=lambda x: x.WeightedDuration())
				265	for target in entries[-long_count:]:
				266	print ' %8.1f weighted s to build %s (%.1f s CPU time)' % (
				267	target.WeightedDuration(),
				268	target.DescribeTargets(), target.Duration())
				269
				270	# Sum up the time by file extension/type of the output file
				271	count_by_ext = {}
				272	time_by_ext = {}
				273	weighted_time_by_ext = {}
				274	# Scan through all of the targets to build up per-extension statistics.
				275	for target in entries:
				276	extension = GetExtension(target)
				277	time_by_ext[extension] = time_by_ext.get(extension, 0) + target.Duration()
				278	weighted_time_by_ext[extension] = weighted_time_by_ext.get(extension,
				279	0) + target.WeightedDuration()
				280	count_by_ext[extension] = count_by_ext.get(extension, 0) + 1
				281
				282	print ' Time by build-step type:'
				283	# Copy to a list with extension name and total time swapped, to (time, ext)
				284	weighted_time_by_ext_sorted = sorted((y, x) for (x, y) in
				285	weighted_time_by_ext.items())
				286	# Print the slowest build target types (by weighted time):
				287	for time, extension in weighted_time_by_ext_sorted[-long_ext_count:]:
				288	print (' %8.1f s weighted time to generate %d %s files '
				289	'(%1.1f s CPU time)') % (time, count_by_ext[extension],
				290	extension, time_by_ext[extension])
				291
				292	print ' %.1f s weighted time (%.1f s CPU time, %1.1fx parallelism)' % (
				293	length, total_cpu_time,
				294	total_cpu_time * 1.0 / length)
				295	print ' %d build steps completed, average of %1.2f/s' % (
				296	len(entries), len(entries) / (length))
				297
				298
Daniel Bratell	a10370c	2018-06-11 07:58:59 +0000	[diff] [blame]	299	def main():
Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	300	log_file = '.ninja_log'
Daniel Bratell	a10370c	2018-06-11 07:58:59 +0000	[diff] [blame]	301	parser = argparse.ArgumentParser()
				302	parser.add_argument('-C', dest='build_directory',
				303	help='Build directory.')
				304	parser.add_argument('--log-file',
				305	help="specific ninja log file to analyze.")
				306	args, _extra_args = parser.parse_known_args()
				307	if args.build_directory:
				308	log_file = os.path.join(args.build_directory, log_file)
				309	if args.log_file:
				310	log_file = args.log_file
Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	311
				312	try:
				313	with open(log_file, 'r') as log:
				314	entries = ReadTargets(log, False)
				315	SummarizeEntries(entries)
				316	except IOError:
Daniel Bratell	a10370c	2018-06-11 07:58:59 +0000	[diff] [blame]	317	print 'Log file %r not found, no build summary created.' % log_file
Bruce Dawson	ffc0c7c	2018-02-07 18:00:48 -0800	[diff] [blame]	318	return errno.ENOENT
				319
				320
				321	if __name__ == '__main__':
Daniel Bratell	a10370c	2018-06-11 07:58:59 +0000	[diff] [blame]	322	sys.exit(main())