Edward Lemur | 59a3b2f | 2020-01-14 01:50:50 +0000 | [diff] [blame] | 1 | #!/usr/bin/env vpython |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 2 | # Copyright (c) 2018 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """Summarize the last ninja build, invoked with ninja's -C syntax. |
| 7 | |
| 8 | This script is designed to be automatically run after each ninja build in |
| 9 | order to summarize the build's performance. Making build performance information |
| 10 | more visible should make it easier to notice anomalies and opportunities. To use |
Bruce Dawson | e186e50 | 2018-02-12 15:41:11 -0800 | [diff] [blame] | 11 | this script on Windows just set NINJA_SUMMARIZE_BUILD=1 and run autoninja.bat. |
| 12 | |
| 13 | On Linux you can get autoninja to invoke this script using this syntax: |
| 14 | |
| 15 | $ NINJA_SUMMARIZE_BUILD=1 autoninja -C out/Default/ chrome |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 16 | |
| 17 | You can also call this script directly using ninja's syntax to specify the |
| 18 | output directory of interest: |
| 19 | |
| 20 | > python post_build_ninja_summary.py -C out/Default |
| 21 | |
| 22 | Typical output looks like this: |
| 23 | |
| 24 | >ninja -C out\debug_component base |
| 25 | ninja.exe -C out\debug_component base -j 960 -l 48 -d keeprsp |
| 26 | ninja: Entering directory `out\debug_component' |
| 27 | [1 processes, 1/1 @ 0.3/s : 3.092s ] Regenerating ninja files |
Bruce Dawson | 0e9afd2 | 2019-11-08 18:57:08 +0000 | [diff] [blame] | 28 | Longest build steps: |
| 29 | 0.1 weighted s to build obj/base/base/trace_log.obj (6.7 s elapsed time) |
| 30 | 0.2 weighted s to build nasm.exe, nasm.exe.pdb (0.2 s elapsed time) |
| 31 | 0.3 weighted s to build obj/base/base/win_util.obj (12.4 s elapsed time) |
| 32 | 1.2 weighted s to build base.dll, base.dll.lib (1.2 s elapsed time) |
| 33 | Time by build-step type: |
| 34 | 0.0 s weighted time to generate 6 .lib files (0.3 s elapsed time sum) |
| 35 | 0.1 s weighted time to generate 25 .stamp files (1.2 s elapsed time sum) |
| 36 | 0.2 s weighted time to generate 20 .o files (2.8 s elapsed time sum) |
| 37 | 1.7 s weighted time to generate 4 PEFile (linking) files (2.0 s elapsed |
| 38 | time sum) |
| 39 | 23.9 s weighted time to generate 770 .obj files (974.8 s elapsed time sum) |
| 40 | 26.1 s weighted time (982.9 s elapsed time sum, 37.7x parallelism) |
| 41 | 839 build steps completed, average of 32.17/s |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 42 | |
| 43 | If no gn clean has been done then results will be for the last non-NULL |
| 44 | invocation of ninja. Ideas for future statistics, and implementations are |
| 45 | appreciated. |
| 46 | |
| 47 | The "weighted" time is the elapsed time of each build step divided by the number |
| 48 | of tasks that were running in parallel. This makes it an excellent approximation |
| 49 | of how "important" a slow step was. A link that is entirely or mostly serialized |
| 50 | will have a weighted time that is the same or similar to its elapsed time. A |
| 51 | compile that runs in parallel with 999 other compiles will have a weighted time |
| 52 | that is tiny.""" |
| 53 | |
Raul Tambre | 80ee78e | 2019-05-06 22:41:05 +0000 | [diff] [blame] | 54 | from __future__ import print_function |
| 55 | |
Daniel Bratell | a10370c | 2018-06-11 07:58:59 +0000 | [diff] [blame] | 56 | import argparse |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 57 | import errno |
Bruce Dawson | 34d90be | 2020-03-16 23:08:05 +0000 | [diff] [blame^] | 58 | import fnmatch |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 59 | import os |
| 60 | import sys |
| 61 | |
| 62 | |
| 63 | # The number of long build times to report: |
| 64 | long_count = 10 |
| 65 | # The number of long times by extension to report |
| 66 | long_ext_count = 5 |
| 67 | |
| 68 | |
| 69 | class Target: |
| 70 | """Represents a single line read for a .ninja_log file.""" |
| 71 | def __init__(self, start, end): |
Bruce Dawson | 6be8afd | 2018-06-11 20:00:05 +0000 | [diff] [blame] | 72 | """Creates a target object by passing in the start/end times in seconds |
| 73 | as a float.""" |
| 74 | self.start = start |
| 75 | self.end = end |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 76 | # A list of targets, appended to by the owner of this object. |
| 77 | self.targets = [] |
| 78 | self.weighted_duration = 0.0 |
| 79 | |
| 80 | def Duration(self): |
| 81 | """Returns the task duration in seconds as a float.""" |
| 82 | return self.end - self.start |
| 83 | |
| 84 | def SetWeightedDuration(self, weighted_duration): |
| 85 | """Sets the duration, in seconds, passed in as a float.""" |
| 86 | self.weighted_duration = weighted_duration |
| 87 | |
| 88 | def WeightedDuration(self): |
| 89 | """Returns the task's weighted duration in seconds as a float. |
| 90 | |
| 91 | Weighted_duration takes the elapsed time of the task and divides it |
| 92 | by how many other tasks were running at the same time. Thus, it |
| 93 | represents the approximate impact of this task on the total build time, |
| 94 | with serialized or serializing steps typically ending up with much |
| 95 | longer weighted durations. |
| 96 | weighted_duration should always be the same or shorter than duration. |
| 97 | """ |
| 98 | # Allow for modest floating-point errors |
| 99 | epsilon = 0.000002 |
| 100 | if (self.weighted_duration > self.Duration() + epsilon): |
Raul Tambre | 80ee78e | 2019-05-06 22:41:05 +0000 | [diff] [blame] | 101 | print('%s > %s?' % (self.weighted_duration, self.Duration())) |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 102 | assert(self.weighted_duration <= self.Duration() + epsilon) |
| 103 | return self.weighted_duration |
| 104 | |
| 105 | def DescribeTargets(self): |
| 106 | """Returns a printable string that summarizes the targets.""" |
| 107 | if len(self.targets) == 1: |
| 108 | return self.targets[0] |
| 109 | # Some build steps generate dozens of outputs - handle them sanely. |
| 110 | # It's a bit odd that if there are three targets we return all three |
| 111 | # but if there are more than three we just return two, but this works |
| 112 | # well in practice. |
| 113 | elif len(self.targets) > 3: |
| 114 | return '(%d items) ' % len(self.targets) + ( |
| 115 | ', '.join(self.targets[:2]) + ', ...') |
| 116 | else: |
| 117 | return ', '.join(self.targets) |
| 118 | |
| 119 | |
| 120 | # Copied with some modifications from ninjatracing |
| 121 | def ReadTargets(log, show_all): |
| 122 | """Reads all targets from .ninja_log file |log_file|, sorted by duration. |
| 123 | |
| 124 | The result is a list of Target objects.""" |
| 125 | header = log.readline() |
| 126 | assert header == '# ninja log v5\n', \ |
| 127 | 'unrecognized ninja log version %r' % header |
Bruce Dawson | 6be8afd | 2018-06-11 20:00:05 +0000 | [diff] [blame] | 128 | targets_dict = {} |
| 129 | last_end_seen = 0.0 |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 130 | for line in log: |
| 131 | parts = line.strip().split('\t') |
| 132 | if len(parts) != 5: |
| 133 | # If ninja.exe is rudely halted then the .ninja_log file may be |
| 134 | # corrupt. Silently continue. |
| 135 | continue |
| 136 | start, end, _, name, cmdhash = parts # Ignore restat. |
Bruce Dawson | 6be8afd | 2018-06-11 20:00:05 +0000 | [diff] [blame] | 137 | # Convert from integral milliseconds to float seconds. |
| 138 | start = int(start) / 1000.0 |
| 139 | end = int(end) / 1000.0 |
| 140 | if not show_all and end < last_end_seen: |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 141 | # An earlier time stamp means that this step is the first in a new |
| 142 | # build, possibly an incremental build. Throw away the previous |
| 143 | # data so that this new build will be displayed independently. |
Bruce Dawson | 6be8afd | 2018-06-11 20:00:05 +0000 | [diff] [blame] | 144 | # This has to be done by comparing end times because records are |
| 145 | # written to the .ninja_log file when commands complete, so end |
| 146 | # times are guaranteed to be in order, but start times are not. |
| 147 | targets_dict = {} |
| 148 | target = None |
| 149 | if cmdhash in targets_dict: |
| 150 | target = targets_dict[cmdhash] |
| 151 | if not show_all and (target.start != start or target.end != end): |
| 152 | # If several builds in a row just run one or two build steps then |
| 153 | # the end times may not go backwards so the last build may not be |
| 154 | # detected as such. However in many cases there will be a build step |
| 155 | # repeated in the two builds and the changed start/stop points for |
| 156 | # that command, identified by the hash, can be used to detect and |
| 157 | # reset the target dictionary. |
| 158 | targets_dict = {} |
| 159 | target = None |
| 160 | if not target: |
| 161 | targets_dict[cmdhash] = target = Target(start, end) |
| 162 | last_end_seen = end |
| 163 | target.targets.append(name) |
| 164 | return targets_dict.values() |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 165 | |
| 166 | |
Bruce Dawson | 34d90be | 2020-03-16 23:08:05 +0000 | [diff] [blame^] | 167 | def GetExtension(target, extra_patterns): |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 168 | """Return the file extension that best represents a target. |
| 169 | |
| 170 | For targets that generate multiple outputs it is important to return a |
| 171 | consistent 'canonical' extension. Ultimately the goal is to group build steps |
| 172 | by type.""" |
| 173 | for output in target.targets: |
Bruce Dawson | 34d90be | 2020-03-16 23:08:05 +0000 | [diff] [blame^] | 174 | if extra_patterns: |
| 175 | for fn_pattern in extra_patterns.split(';'): |
| 176 | if fnmatch.fnmatch(output, '*' + fn_pattern + '*'): |
| 177 | return fn_pattern |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 178 | # Not a true extension, but a good grouping. |
| 179 | if output.endswith('type_mappings'): |
| 180 | extension = 'type_mappings' |
| 181 | break |
| 182 | extension = os.path.splitext(output)[1] |
| 183 | if len(extension) == 0: |
| 184 | extension = '(no extension found)' |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 185 | if extension in ['.pdb', '.dll', '.exe']: |
| 186 | extension = 'PEFile (linking)' |
| 187 | # Make sure that .dll and .exe are grouped together and that the |
| 188 | # .dll.lib files don't cause these to be listed as libraries |
| 189 | break |
Bruce Dawson | e186e50 | 2018-02-12 15:41:11 -0800 | [diff] [blame] | 190 | if extension in ['.so', '.TOC']: |
| 191 | extension = '.so (linking)' |
| 192 | # Attempt to identify linking, avoid identifying as '.TOC' |
| 193 | break |
Bruce Dawson | 1eb16f6 | 2020-02-21 16:56:35 +0000 | [diff] [blame] | 194 | # Make sure .obj files don't get categorized as mojo files |
| 195 | if extension in ['.obj', '.o']: |
| 196 | break |
| 197 | # Normalize all mojo related outputs to 'mojo'. |
| 198 | if output.count('.mojom') > 0: |
| 199 | extension = 'mojo' |
| 200 | break |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 201 | return extension |
| 202 | |
| 203 | |
Bruce Dawson | 34d90be | 2020-03-16 23:08:05 +0000 | [diff] [blame^] | 204 | def SummarizeEntries(entries, extra_step_types): |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 205 | """Print a summary of the passed in list of Target objects.""" |
| 206 | |
| 207 | # Create a list that is in order by time stamp and has entries for the |
| 208 | # beginning and ending of each build step (one time stamp may have multiple |
| 209 | # entries due to multiple steps starting/stopping at exactly the same time). |
| 210 | # Iterate through this list, keeping track of which tasks are running at all |
| 211 | # times. At each time step calculate a running total for weighted time so |
| 212 | # that when each task ends its own weighted time can easily be calculated. |
| 213 | task_start_stop_times = [] |
| 214 | |
| 215 | earliest = -1 |
| 216 | latest = 0 |
| 217 | total_cpu_time = 0 |
| 218 | for target in entries: |
| 219 | if earliest < 0 or target.start < earliest: |
| 220 | earliest = target.start |
| 221 | if target.end > latest: |
| 222 | latest = target.end |
| 223 | total_cpu_time += target.Duration() |
| 224 | task_start_stop_times.append((target.start, 'start', target)) |
| 225 | task_start_stop_times.append((target.end, 'stop', target)) |
| 226 | length = latest - earliest |
| 227 | weighted_total = 0.0 |
| 228 | |
| 229 | task_start_stop_times.sort() |
| 230 | # Now we have all task start/stop times sorted by when they happen. If a |
| 231 | # task starts and stops on the same time stamp then the start will come |
| 232 | # first because of the alphabet, which is important for making this work |
| 233 | # correctly. |
| 234 | # Track the tasks which are currently running. |
| 235 | running_tasks = {} |
| 236 | # Record the time we have processed up to so we know how to calculate time |
| 237 | # deltas. |
| 238 | last_time = task_start_stop_times[0][0] |
| 239 | # Track the accumulated weighted time so that it can efficiently be added |
| 240 | # to individual tasks. |
| 241 | last_weighted_time = 0.0 |
| 242 | # Scan all start/stop events. |
| 243 | for event in task_start_stop_times: |
| 244 | time, action_name, target = event |
| 245 | # Accumulate weighted time up to now. |
| 246 | num_running = len(running_tasks) |
| 247 | if num_running > 0: |
| 248 | # Update the total weighted time up to this moment. |
| 249 | last_weighted_time += (time - last_time) / float(num_running) |
| 250 | if action_name == 'start': |
| 251 | # Record the total weighted task time when this task starts. |
| 252 | running_tasks[target] = last_weighted_time |
| 253 | if action_name == 'stop': |
| 254 | # Record the change in the total weighted task time while this task ran. |
| 255 | weighted_duration = last_weighted_time - running_tasks[target] |
| 256 | target.SetWeightedDuration(weighted_duration) |
| 257 | weighted_total += weighted_duration |
| 258 | del running_tasks[target] |
| 259 | last_time = time |
| 260 | assert(len(running_tasks) == 0) |
| 261 | |
| 262 | # Warn if the sum of weighted times is off by more than half a second. |
| 263 | if abs(length - weighted_total) > 500: |
Raul Tambre | 80ee78e | 2019-05-06 22:41:05 +0000 | [diff] [blame] | 264 | print('Discrepancy!!! Length = %.3f, weighted total = %.3f' % ( |
| 265 | length, weighted_total)) |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 266 | |
| 267 | # Print the slowest build steps (by weighted time). |
Raul Tambre | 80ee78e | 2019-05-06 22:41:05 +0000 | [diff] [blame] | 268 | print(' Longest build steps:') |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 269 | entries.sort(key=lambda x: x.WeightedDuration()) |
| 270 | for target in entries[-long_count:]: |
Bruce Dawson | 0e9afd2 | 2019-11-08 18:57:08 +0000 | [diff] [blame] | 271 | print(' %8.1f weighted s to build %s (%.1f s elapsed time)' % ( |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 272 | target.WeightedDuration(), |
Raul Tambre | 80ee78e | 2019-05-06 22:41:05 +0000 | [diff] [blame] | 273 | target.DescribeTargets(), target.Duration())) |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 274 | |
| 275 | # Sum up the time by file extension/type of the output file |
| 276 | count_by_ext = {} |
| 277 | time_by_ext = {} |
| 278 | weighted_time_by_ext = {} |
| 279 | # Scan through all of the targets to build up per-extension statistics. |
| 280 | for target in entries: |
Bruce Dawson | 34d90be | 2020-03-16 23:08:05 +0000 | [diff] [blame^] | 281 | extension = GetExtension(target, extra_step_types) |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 282 | time_by_ext[extension] = time_by_ext.get(extension, 0) + target.Duration() |
| 283 | weighted_time_by_ext[extension] = weighted_time_by_ext.get(extension, |
| 284 | 0) + target.WeightedDuration() |
| 285 | count_by_ext[extension] = count_by_ext.get(extension, 0) + 1 |
| 286 | |
Raul Tambre | 80ee78e | 2019-05-06 22:41:05 +0000 | [diff] [blame] | 287 | print(' Time by build-step type:') |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 288 | # Copy to a list with extension name and total time swapped, to (time, ext) |
| 289 | weighted_time_by_ext_sorted = sorted((y, x) for (x, y) in |
| 290 | weighted_time_by_ext.items()) |
| 291 | # Print the slowest build target types (by weighted time): |
| 292 | for time, extension in weighted_time_by_ext_sorted[-long_ext_count:]: |
Raul Tambre | 80ee78e | 2019-05-06 22:41:05 +0000 | [diff] [blame] | 293 | print(' %8.1f s weighted time to generate %d %s files ' |
Bruce Dawson | 0e9afd2 | 2019-11-08 18:57:08 +0000 | [diff] [blame] | 294 | '(%1.1f s elapsed time sum)' % (time, count_by_ext[extension], |
Raul Tambre | 80ee78e | 2019-05-06 22:41:05 +0000 | [diff] [blame] | 295 | extension, time_by_ext[extension])) |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 296 | |
Bruce Dawson | 0e9afd2 | 2019-11-08 18:57:08 +0000 | [diff] [blame] | 297 | print(' %.1f s weighted time (%.1f s elapsed time sum, %1.1fx ' |
| 298 | 'parallelism)' % (length, total_cpu_time, |
Raul Tambre | 80ee78e | 2019-05-06 22:41:05 +0000 | [diff] [blame] | 299 | total_cpu_time * 1.0 / length)) |
| 300 | print(' %d build steps completed, average of %1.2f/s' % ( |
| 301 | len(entries), len(entries) / (length))) |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 302 | |
| 303 | |
Daniel Bratell | a10370c | 2018-06-11 07:58:59 +0000 | [diff] [blame] | 304 | def main(): |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 305 | log_file = '.ninja_log' |
Daniel Bratell | a10370c | 2018-06-11 07:58:59 +0000 | [diff] [blame] | 306 | parser = argparse.ArgumentParser() |
| 307 | parser.add_argument('-C', dest='build_directory', |
| 308 | help='Build directory.') |
Bruce Dawson | 34d90be | 2020-03-16 23:08:05 +0000 | [diff] [blame^] | 309 | parser.add_argument( |
| 310 | '-s', |
| 311 | '--step-types', |
| 312 | help='semicolon separated fnmatch patterns for build-step grouping') |
Daniel Bratell | a10370c | 2018-06-11 07:58:59 +0000 | [diff] [blame] | 313 | parser.add_argument('--log-file', |
| 314 | help="specific ninja log file to analyze.") |
| 315 | args, _extra_args = parser.parse_known_args() |
| 316 | if args.build_directory: |
| 317 | log_file = os.path.join(args.build_directory, log_file) |
| 318 | if args.log_file: |
| 319 | log_file = args.log_file |
Bruce Dawson | 34d90be | 2020-03-16 23:08:05 +0000 | [diff] [blame^] | 320 | if not args.step_types: |
| 321 | # Offer a convenient way to add extra step types automatically, including |
| 322 | # when this script is run by autoninja. get() returns None if the variable |
| 323 | # isn't set. |
| 324 | args.step_types = os.environ.get('chromium_step_types') |
| 325 | if args.step_types: |
| 326 | # Make room for the extra build types. |
| 327 | global long_ext_count |
| 328 | long_ext_count += len(args.step_types.split(';')) |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 329 | |
| 330 | try: |
| 331 | with open(log_file, 'r') as log: |
| 332 | entries = ReadTargets(log, False) |
Bruce Dawson | 34d90be | 2020-03-16 23:08:05 +0000 | [diff] [blame^] | 333 | SummarizeEntries(entries, args.step_types) |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 334 | except IOError: |
Raul Tambre | 80ee78e | 2019-05-06 22:41:05 +0000 | [diff] [blame] | 335 | print('Log file %r not found, no build summary created.' % log_file) |
Bruce Dawson | ffc0c7c | 2018-02-07 18:00:48 -0800 | [diff] [blame] | 336 | return errno.ENOENT |
| 337 | |
| 338 | |
| 339 | if __name__ == '__main__': |
Daniel Bratell | a10370c | 2018-06-11 07:58:59 +0000 | [diff] [blame] | 340 | sys.exit(main()) |