Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 1 | # Copyright (c) 2018 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | """Summarize the last ninja build, invoked with ninja's -C syntax. |
| 6 | |
| 7 | This script is designed to be automatically run after each ninja build in |
| 8 | order to summarize the build's performance. Making build performance information |
| 9 | more visible should make it easier to notice anomalies and opportunities. To use |
Bruce Dawson | e186e50 | 2018-02-12 15:41:11 -0800 | [diff] [blame] | 10 | this script on Windows just set NINJA_SUMMARIZE_BUILD=1 and run autoninja.bat. |
| 11 | |
| 12 | On Linux you can get autoninja to invoke this script using this syntax: |
| 13 | |
| 14 | $ NINJA_SUMMARIZE_BUILD=1 autoninja -C out/Default/ chrome |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 15 | |
| 16 | You can also call this script directly using ninja's syntax to specify the |
| 17 | output directory of interest: |
| 18 | |
| 19 | > python post_build_ninja_summary.py -C out/Default |
| 20 | |
| 21 | Typical output looks like this: |
| 22 | |
| 23 | >ninja -C out\debug_component base |
| 24 | ninja.exe -C out\debug_component base -j 960 -l 48 -d keeprsp |
| 25 | ninja: Entering directory `out\debug_component' |
| 26 | [1 processes, 1/1 @ 0.3/s : 3.092s ] Regenerating ninja files |
| 27 | [1 processes, 23/23 @ 0.9/s : 26.280s ] LINK(DLL) base.dll base.dll.lib |
| 28 | Longest build steps: |
| 29 | 0.9 weighted s to build obj/base/base_jumbo_17.obj (5.0 s CPU time) |
| 30 | 1.0 weighted s to build obj/base/base_jumbo_31.obj (13.1 s CPU time) |
| 31 | 1.2 weighted s to build obj/base/base_jumbo_4.obj (14.7 s CPU time) |
| 32 | 1.3 weighted s to build obj/base/base_jumbo_32.obj (15.0 s CPU time) |
| 33 | 1.6 weighted s to build obj/base/base_jumbo_26.obj (17.1 s CPU time) |
| 34 | 1.7 weighted s to build base.dll, base.dll.lib (1.7 s CPU time) |
| 35 | 1.7 weighted s to build obj/base/base_jumbo_11.obj (15.9 s CPU time) |
| 36 | 1.9 weighted s to build obj/base/base_jumbo_12.obj (18.5 s CPU time) |
| 37 | 3.6 weighted s to build obj/base/base_jumbo_34.obj (20.1 s CPU time) |
| 38 | 4.3 weighted s to build obj/base/base_jumbo_33.obj (22.3 s CPU time) |
| 39 | Time by build-step type: |
| 40 | 0.1 s weighted time to generate 1 .c files (0.1 s CPU time) |
| 41 | 0.1 s weighted time to generate 1 .stamp files (0.1 s CPU time) |
| 42 | 0.2 s weighted time to generate 1 .h files (0.2 s CPU time) |
| 43 | 1.7 s weighted time to generate 1 PEFile files (1.7 s CPU time) |
| 44 | 24.3 s weighted time to generate 19 .obj files (233.4 s CPU time) |
| 45 | 26.3 s weighted time (235.5 s CPU time, 9.0x parallelism) |
| 46 | 23 build steps completed, average of 0.88/s |
| 47 | |
| 48 | If no gn clean has been done then results will be for the last non-NULL |
| 49 | invocation of ninja. Ideas for future statistics, and implementations are |
| 50 | appreciated. |
| 51 | |
| 52 | The "weighted" time is the elapsed time of each build step divided by the number |
| 53 | of tasks that were running in parallel. This makes it an excellent approximation |
| 54 | of how "important" a slow step was. A link that is entirely or mostly serialized |
| 55 | will have a weighted time that is the same or similar to its elapsed time. A |
| 56 | compile that runs in parallel with 999 other compiles will have a weighted time |
| 57 | that is tiny.""" |
| 58 | |
Daniel Bratell | a10370c | 2018-06-11 07:58:59 +0000 | [diff] [blame] | 59 | import argparse |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 60 | import errno |
| 61 | import os |
| 62 | import sys |
| 63 | |
| 64 | |
| 65 | # The number of long build times to report: |
| 66 | long_count = 10 |
| 67 | # The number of long times by extension to report |
| 68 | long_ext_count = 5 |
| 69 | |
| 70 | |
| 71 | class Target: |
| 72 | """Represents a single line read for a .ninja_log file.""" |
| 73 | def __init__(self, start, end): |
Bruce Dawson | 6be8afd | 2018-06-11 20:00:05 +0000 | [diff] [blame] | 74 | """Creates a target object by passing in the start/end times in seconds |
| 75 | as a float.""" |
| 76 | self.start = start |
| 77 | self.end = end |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 78 | # A list of targets, appended to by the owner of this object. |
| 79 | self.targets = [] |
| 80 | self.weighted_duration = 0.0 |
| 81 | |
| 82 | def Duration(self): |
| 83 | """Returns the task duration in seconds as a float.""" |
| 84 | return self.end - self.start |
| 85 | |
| 86 | def SetWeightedDuration(self, weighted_duration): |
| 87 | """Sets the duration, in seconds, passed in as a float.""" |
| 88 | self.weighted_duration = weighted_duration |
| 89 | |
| 90 | def WeightedDuration(self): |
| 91 | """Returns the task's weighted duration in seconds as a float. |
| 92 | |
| 93 | Weighted_duration takes the elapsed time of the task and divides it |
| 94 | by how many other tasks were running at the same time. Thus, it |
| 95 | represents the approximate impact of this task on the total build time, |
| 96 | with serialized or serializing steps typically ending up with much |
| 97 | longer weighted durations. |
| 98 | weighted_duration should always be the same or shorter than duration. |
| 99 | """ |
| 100 | # Allow for modest floating-point errors |
| 101 | epsilon = 0.000002 |
| 102 | if (self.weighted_duration > self.Duration() + epsilon): |
| 103 | print '%s > %s?' % (self.weighted_duration, self.Duration()) |
| 104 | assert(self.weighted_duration <= self.Duration() + epsilon) |
| 105 | return self.weighted_duration |
| 106 | |
| 107 | def DescribeTargets(self): |
| 108 | """Returns a printable string that summarizes the targets.""" |
| 109 | if len(self.targets) == 1: |
| 110 | return self.targets[0] |
| 111 | # Some build steps generate dozens of outputs - handle them sanely. |
| 112 | # It's a bit odd that if there are three targets we return all three |
| 113 | # but if there are more than three we just return two, but this works |
| 114 | # well in practice. |
| 115 | elif len(self.targets) > 3: |
| 116 | return '(%d items) ' % len(self.targets) + ( |
| 117 | ', '.join(self.targets[:2]) + ', ...') |
| 118 | else: |
| 119 | return ', '.join(self.targets) |
| 120 | |
| 121 | |
| 122 | # Copied with some modifications from ninjatracing |
| 123 | def ReadTargets(log, show_all): |
| 124 | """Reads all targets from .ninja_log file |log_file|, sorted by duration. |
| 125 | |
| 126 | The result is a list of Target objects.""" |
| 127 | header = log.readline() |
| 128 | assert header == '# ninja log v5\n', \ |
| 129 | 'unrecognized ninja log version %r' % header |
Bruce Dawson | 6be8afd | 2018-06-11 20:00:05 +0000 | [diff] [blame] | 130 | targets_dict = {} |
| 131 | last_end_seen = 0.0 |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 132 | for line in log: |
| 133 | parts = line.strip().split('\t') |
| 134 | if len(parts) != 5: |
| 135 | # If ninja.exe is rudely halted then the .ninja_log file may be |
| 136 | # corrupt. Silently continue. |
| 137 | continue |
| 138 | start, end, _, name, cmdhash = parts # Ignore restat. |
Bruce Dawson | 6be8afd | 2018-06-11 20:00:05 +0000 | [diff] [blame] | 139 | # Convert from integral milliseconds to float seconds. |
| 140 | start = int(start) / 1000.0 |
| 141 | end = int(end) / 1000.0 |
| 142 | if not show_all and end < last_end_seen: |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 143 | # An earlier time stamp means that this step is the first in a new |
| 144 | # build, possibly an incremental build. Throw away the previous |
| 145 | # data so that this new build will be displayed independently. |
Bruce Dawson | 6be8afd | 2018-06-11 20:00:05 +0000 | [diff] [blame] | 146 | # This has to be done by comparing end times because records are |
| 147 | # written to the .ninja_log file when commands complete, so end |
| 148 | # times are guaranteed to be in order, but start times are not. |
| 149 | targets_dict = {} |
| 150 | target = None |
| 151 | if cmdhash in targets_dict: |
| 152 | target = targets_dict[cmdhash] |
| 153 | if not show_all and (target.start != start or target.end != end): |
| 154 | # If several builds in a row just run one or two build steps then |
| 155 | # the end times may not go backwards so the last build may not be |
| 156 | # detected as such. However in many cases there will be a build step |
| 157 | # repeated in the two builds and the changed start/stop points for |
| 158 | # that command, identified by the hash, can be used to detect and |
| 159 | # reset the target dictionary. |
| 160 | targets_dict = {} |
| 161 | target = None |
| 162 | if not target: |
| 163 | targets_dict[cmdhash] = target = Target(start, end) |
| 164 | last_end_seen = end |
| 165 | target.targets.append(name) |
| 166 | return targets_dict.values() |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 167 | |
| 168 | |
| 169 | def GetExtension(target): |
| 170 | """Return the file extension that best represents a target. |
| 171 | |
| 172 | For targets that generate multiple outputs it is important to return a |
| 173 | consistent 'canonical' extension. Ultimately the goal is to group build steps |
| 174 | by type.""" |
| 175 | for output in target.targets: |
| 176 | # Normalize all mojo related outputs to 'mojo'. |
| 177 | if output.count('.mojom') > 0: |
| 178 | extension = 'mojo' |
| 179 | break |
| 180 | # Not a true extension, but a good grouping. |
| 181 | if output.endswith('type_mappings'): |
| 182 | extension = 'type_mappings' |
| 183 | break |
| 184 | extension = os.path.splitext(output)[1] |
| 185 | if len(extension) == 0: |
| 186 | extension = '(no extension found)' |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 187 | if extension in ['.pdb', '.dll', '.exe']: |
| 188 | extension = 'PEFile (linking)' |
| 189 | # Make sure that .dll and .exe are grouped together and that the |
| 190 | # .dll.lib files don't cause these to be listed as libraries |
| 191 | break |
Bruce Dawson | e186e50 | 2018-02-12 15:41:11 -0800 | [diff] [blame] | 192 | if extension in ['.so', '.TOC']: |
| 193 | extension = '.so (linking)' |
| 194 | # Attempt to identify linking, avoid identifying as '.TOC' |
| 195 | break |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 196 | return extension |
| 197 | |
| 198 | |
| 199 | def SummarizeEntries(entries): |
| 200 | """Print a summary of the passed in list of Target objects.""" |
| 201 | |
| 202 | # Create a list that is in order by time stamp and has entries for the |
| 203 | # beginning and ending of each build step (one time stamp may have multiple |
| 204 | # entries due to multiple steps starting/stopping at exactly the same time). |
| 205 | # Iterate through this list, keeping track of which tasks are running at all |
| 206 | # times. At each time step calculate a running total for weighted time so |
| 207 | # that when each task ends its own weighted time can easily be calculated. |
| 208 | task_start_stop_times = [] |
| 209 | |
| 210 | earliest = -1 |
| 211 | latest = 0 |
| 212 | total_cpu_time = 0 |
| 213 | for target in entries: |
| 214 | if earliest < 0 or target.start < earliest: |
| 215 | earliest = target.start |
| 216 | if target.end > latest: |
| 217 | latest = target.end |
| 218 | total_cpu_time += target.Duration() |
| 219 | task_start_stop_times.append((target.start, 'start', target)) |
| 220 | task_start_stop_times.append((target.end, 'stop', target)) |
| 221 | length = latest - earliest |
| 222 | weighted_total = 0.0 |
| 223 | |
| 224 | task_start_stop_times.sort() |
| 225 | # Now we have all task start/stop times sorted by when they happen. If a |
| 226 | # task starts and stops on the same time stamp then the start will come |
| 227 | # first because of the alphabet, which is important for making this work |
| 228 | # correctly. |
| 229 | # Track the tasks which are currently running. |
| 230 | running_tasks = {} |
| 231 | # Record the time we have processed up to so we know how to calculate time |
| 232 | # deltas. |
| 233 | last_time = task_start_stop_times[0][0] |
| 234 | # Track the accumulated weighted time so that it can efficiently be added |
| 235 | # to individual tasks. |
| 236 | last_weighted_time = 0.0 |
| 237 | # Scan all start/stop events. |
| 238 | for event in task_start_stop_times: |
| 239 | time, action_name, target = event |
| 240 | # Accumulate weighted time up to now. |
| 241 | num_running = len(running_tasks) |
| 242 | if num_running > 0: |
| 243 | # Update the total weighted time up to this moment. |
| 244 | last_weighted_time += (time - last_time) / float(num_running) |
| 245 | if action_name == 'start': |
| 246 | # Record the total weighted task time when this task starts. |
| 247 | running_tasks[target] = last_weighted_time |
| 248 | if action_name == 'stop': |
| 249 | # Record the change in the total weighted task time while this task ran. |
| 250 | weighted_duration = last_weighted_time - running_tasks[target] |
| 251 | target.SetWeightedDuration(weighted_duration) |
| 252 | weighted_total += weighted_duration |
| 253 | del running_tasks[target] |
| 254 | last_time = time |
| 255 | assert(len(running_tasks) == 0) |
| 256 | |
| 257 | # Warn if the sum of weighted times is off by more than half a second. |
| 258 | if abs(length - weighted_total) > 500: |
| 259 | print 'Discrepancy!!! Length = %.3f, weighted total = %.3f' % ( |
| 260 | length, weighted_total) |
| 261 | |
| 262 | # Print the slowest build steps (by weighted time). |
| 263 | print ' Longest build steps:' |
| 264 | entries.sort(key=lambda x: x.WeightedDuration()) |
| 265 | for target in entries[-long_count:]: |
| 266 | print ' %8.1f weighted s to build %s (%.1f s CPU time)' % ( |
| 267 | target.WeightedDuration(), |
| 268 | target.DescribeTargets(), target.Duration()) |
| 269 | |
| 270 | # Sum up the time by file extension/type of the output file |
| 271 | count_by_ext = {} |
| 272 | time_by_ext = {} |
| 273 | weighted_time_by_ext = {} |
| 274 | # Scan through all of the targets to build up per-extension statistics. |
| 275 | for target in entries: |
| 276 | extension = GetExtension(target) |
| 277 | time_by_ext[extension] = time_by_ext.get(extension, 0) + target.Duration() |
| 278 | weighted_time_by_ext[extension] = weighted_time_by_ext.get(extension, |
| 279 | 0) + target.WeightedDuration() |
| 280 | count_by_ext[extension] = count_by_ext.get(extension, 0) + 1 |
| 281 | |
| 282 | print ' Time by build-step type:' |
| 283 | # Copy to a list with extension name and total time swapped, to (time, ext) |
| 284 | weighted_time_by_ext_sorted = sorted((y, x) for (x, y) in |
| 285 | weighted_time_by_ext.items()) |
| 286 | # Print the slowest build target types (by weighted time): |
| 287 | for time, extension in weighted_time_by_ext_sorted[-long_ext_count:]: |
| 288 | print (' %8.1f s weighted time to generate %d %s files ' |
| 289 | '(%1.1f s CPU time)') % (time, count_by_ext[extension], |
| 290 | extension, time_by_ext[extension]) |
| 291 | |
| 292 | print ' %.1f s weighted time (%.1f s CPU time, %1.1fx parallelism)' % ( |
| 293 | length, total_cpu_time, |
| 294 | total_cpu_time * 1.0 / length) |
| 295 | print ' %d build steps completed, average of %1.2f/s' % ( |
| 296 | len(entries), len(entries) / (length)) |
| 297 | |
| 298 | |
Daniel Bratell | a10370c | 2018-06-11 07:58:59 +0000 | [diff] [blame] | 299 | def main(): |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 300 | log_file = '.ninja_log' |
Daniel Bratell | a10370c | 2018-06-11 07:58:59 +0000 | [diff] [blame] | 301 | parser = argparse.ArgumentParser() |
| 302 | parser.add_argument('-C', dest='build_directory', |
| 303 | help='Build directory.') |
| 304 | parser.add_argument('--log-file', |
| 305 | help="specific ninja log file to analyze.") |
| 306 | args, _extra_args = parser.parse_known_args() |
| 307 | if args.build_directory: |
| 308 | log_file = os.path.join(args.build_directory, log_file) |
| 309 | if args.log_file: |
| 310 | log_file = args.log_file |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 311 | |
| 312 | try: |
| 313 | with open(log_file, 'r') as log: |
| 314 | entries = ReadTargets(log, False) |
| 315 | SummarizeEntries(entries) |
| 316 | except IOError: |
Daniel Bratell | a10370c | 2018-06-11 07:58:59 +0000 | [diff] [blame] | 317 | print 'Log file %r not found, no build summary created.' % log_file |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 318 | return errno.ENOENT |
| 319 | |
| 320 | |
| 321 | if __name__ == '__main__': |
Daniel Bratell | a10370c | 2018-06-11 07:58:59 +0000 | [diff] [blame] | 322 | sys.exit(main()) |