blob: 012ddc68066fd1c9731843269a5d623e32cb9332 [file] [log] [blame]
Tim Bain0bff09b2023-09-13 22:34:53 +00001# Copyright 2023 The ChromiumOS Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Analyze the execution log files generated by Bazel."""
6
7import logging
8import os
9from pathlib import Path
10import re
11import shutil
12from typing import List, Optional
13
14from chromite.lib import commandline
15
16
17def translate_target_label_to_file_name(target_label: str) -> str:
18 target_file_name = target_label
19 target_file_name = target_file_name.replace("@", "at-")
20 target_file_name = target_file_name.replace("/", "_")
21 target_file_name = target_file_name.replace("~", "-")
22 target_file_name = target_file_name.replace(".", "_")
23 target_file_name = target_file_name.replace("(", "_")
24 target_file_name = target_file_name.replace(")", "_")
25 target_file_name = target_file_name.replace(" ", "_")
26 target_file_name = target_file_name.replace(":", "___")
27 return target_file_name
28
29
30def split_exec_log(input_file_path: Path, output_dir_path: Path) -> None:
31 """Split an execution log into separate files per action.
32
33 This function takes a text execution log from a Bazel invocation and
34 breaks it apart into separate files under the provided output directory.
35 This facilitates future comparisons between two execution logs, which is
36 very painful when the content is in a single 100+ GB file.
37
38 Args:
39 input_file_path: The Path to the input execution log file.
40 output_dir_path: The Path to the dir to write output files.
41 """
42 if output_dir_path.exists():
43 shutil.rmtree(output_dir_path)
44 output_dir_path.mkdir()
45 target_label_pattern = re.compile(r'target_label: "(.*)"')
46 listed_outputs_pattern = re.compile(r'listed_outputs: "(.*)"')
47
48 lines_for_target_file = []
49 target_label = None
50 action_file_path = None
51 qualifier = None
52
53 logging.info("output_dir_path=%s", output_dir_path)
54
55 with open(input_file_path, "r", encoding="utf-8") as input_file:
56 while True:
57 line = input_file.readline()
58 if not line:
59 break
60
61 if line.startswith("-------"):
62 if os.path.isfile(action_file_path):
63 raise Exception(f"File {action_file_path} already exists.")
64 action_subdir_path.mkdir(exist_ok=True)
65
66 with open(
67 action_file_path, "w", encoding="utf-8"
68 ) as target_file:
69 target_file.writelines(lines_for_target_file)
70
71 lines_for_target_file = []
72 target_label = None
73 action_subdir_path = None
74 action_file_path = None
75 qualifier = None
76 continue
77
78 # Timing information is going to be inherently non-reproducible, so
79 # exclude it.
80 if "seconds: " in line or "nanos: " in line:
81 continue
82
83 lines_for_target_file.append(line)
84 if line.startswith("target_label: "):
85 target_label = target_label_pattern.match(line).group(1)
86 action_subdir_path = (
87 output_dir_path
88 / translate_target_label_to_file_name(target_label)
89 )
90 action_file_path = (
91 action_subdir_path
92 / translate_target_label_to_file_name(qualifier)
93 )
94 logging.info("action_file_path=%s", action_file_path)
95 elif line.startswith("listed_outputs: "):
96 # Only use the first listed_output for a file as its qualifier
97 if not qualifier:
98 match = listed_outputs_pattern.match(line)
99 if match:
100 qualifier = match.group(1)
101
102
103def analyze_exec_logs(exec_log1_path: Path, exec_log2_path: Path) -> None:
104 """Split up to two execution logs into separate files per action.
105
106 This function takes up to two text execution logs from Bazel invocations and
107 breaks them apart into separate files, under output directories with names
108 derived from the input filenames.
109
110 This facilitates future comparisons between two execution logs, which is
111 very painful when the content is in a single 100+ GB file.
112
113 Args:
114 exec_log1_path: The Path to the first input execution log file.
115 exec_log2_path: The Path to the second input execution log file.
116 """
117 exec_log1_output_dir_path = Path(
118 str(exec_log1_path).split(".", maxsplit=1)[0] + "_actions"
119 )
120 split_exec_log(exec_log1_path, exec_log1_output_dir_path)
121
122 if exec_log2_path:
123 exec_log2_output_dir_path = Path(
124 str(exec_log2_path).split(".", maxsplit=1)[0] + "_actions"
125 )
126 split_exec_log(exec_log2_path, exec_log2_output_dir_path)
127
128
129def _get_parser() -> commandline.ArgumentParser:
130 """Build the argument parser."""
131
132 parser = commandline.ArgumentParser()
133
134 parser.add_argument(
135 "--exec_log1",
136 dest="exec_log1_filename",
137 help="The path to the first exec log to analyze.",
138 )
139
140 parser.add_argument(
141 "--exec_log2",
142 dest="exec_log2_filename",
143 help="The path to the second exec log to analyze.",
144 )
145
146 return parser
147
148
149def main(argv: Optional[List[str]]) -> Optional[int]:
150 """Main."""
151 parser = _get_parser()
152 opts = parser.parse_args(argv)
153
154 path1 = Path(opts.exec_log1_filename)
155 path2 = None
156 if opts.exec_log2_filename:
157 path2 = Path(opts.exec_log2_filename)
158 analyze_exec_logs(path1, path2)