Blame - scripts/analyze_bazel_exec_logs.py - chromium.googlesource.com/chromiumos/chromite

blob: 012ddc68066fd1c9731843269a5d623e32cb9332 [file] [log] [blame]

Tim Bain	0bff09b	2023-09-13 22:34:53 +0000	[diff] [blame^]	1	# Copyright 2023 The ChromiumOS Authors
				2	# Use of this source code is governed by a BSD-style license that can be
				3	# found in the LICENSE file.
				4
				5	"""Analyze the execution log files generated by Bazel."""
				6
				7	import logging
				8	import os
				9	from pathlib import Path
				10	import re
				11	import shutil
				12	from typing import List, Optional
				13
				14	from chromite.lib import commandline
				15
				16
				17	def translate_target_label_to_file_name(target_label: str) -> str:
				18	target_file_name = target_label
				19	target_file_name = target_file_name.replace("@", "at-")
				20	target_file_name = target_file_name.replace("/", "_")
				21	target_file_name = target_file_name.replace("~", "-")
				22	target_file_name = target_file_name.replace(".", "_")
				23	target_file_name = target_file_name.replace("(", "_")
				24	target_file_name = target_file_name.replace(")", "_")
				25	target_file_name = target_file_name.replace(" ", "_")
				26	target_file_name = target_file_name.replace(":", "___")
				27	return target_file_name
				28
				29
				30	def split_exec_log(input_file_path: Path, output_dir_path: Path) -> None:
				31	"""Split an execution log into separate files per action.
				32
				33	This function takes a text execution log from a Bazel invocation and
				34	breaks it apart into separate files under the provided output directory.
				35	This facilitates future comparisons between two execution logs, which is
				36	very painful when the content is in a single 100+ GB file.
				37
				38	Args:
				39	input_file_path: The Path to the input execution log file.
				40	output_dir_path: The Path to the dir to write output files.
				41	"""
				42	if output_dir_path.exists():
				43	shutil.rmtree(output_dir_path)
				44	output_dir_path.mkdir()
				45	target_label_pattern = re.compile(r'target_label: "(.*)"')
				46	listed_outputs_pattern = re.compile(r'listed_outputs: "(.*)"')
				47
				48	lines_for_target_file = []
				49	target_label = None
				50	action_file_path = None
				51	qualifier = None
				52
				53	logging.info("output_dir_path=%s", output_dir_path)
				54
				55	with open(input_file_path, "r", encoding="utf-8") as input_file:
				56	while True:
				57	line = input_file.readline()
				58	if not line:
				59	break
				60
				61	if line.startswith("-------"):
				62	if os.path.isfile(action_file_path):
				63	raise Exception(f"File {action_file_path} already exists.")
				64	action_subdir_path.mkdir(exist_ok=True)
				65
				66	with open(
				67	action_file_path, "w", encoding="utf-8"
				68	) as target_file:
				69	target_file.writelines(lines_for_target_file)
				70
				71	lines_for_target_file = []
				72	target_label = None
				73	action_subdir_path = None
				74	action_file_path = None
				75	qualifier = None
				76	continue
				77
				78	# Timing information is going to be inherently non-reproducible, so
				79	# exclude it.
				80	if "seconds: " in line or "nanos: " in line:
				81	continue
				82
				83	lines_for_target_file.append(line)
				84	if line.startswith("target_label: "):
				85	target_label = target_label_pattern.match(line).group(1)
				86	action_subdir_path = (
				87	output_dir_path
				88	/ translate_target_label_to_file_name(target_label)
				89	)
				90	action_file_path = (
				91	action_subdir_path
				92	/ translate_target_label_to_file_name(qualifier)
				93	)
				94	logging.info("action_file_path=%s", action_file_path)
				95	elif line.startswith("listed_outputs: "):
				96	# Only use the first listed_output for a file as its qualifier
				97	if not qualifier:
				98	match = listed_outputs_pattern.match(line)
				99	if match:
				100	qualifier = match.group(1)
				101
				102
				103	def analyze_exec_logs(exec_log1_path: Path, exec_log2_path: Path) -> None:
				104	"""Split up to two execution logs into separate files per action.
				105
				106	This function takes up to two text execution logs from Bazel invocations and
				107	breaks them apart into separate files, under output directories with names
				108	derived from the input filenames.
				109
				110	This facilitates future comparisons between two execution logs, which is
				111	very painful when the content is in a single 100+ GB file.
				112
				113	Args:
				114	exec_log1_path: The Path to the first input execution log file.
				115	exec_log2_path: The Path to the second input execution log file.
				116	"""
				117	exec_log1_output_dir_path = Path(
				118	str(exec_log1_path).split(".", maxsplit=1)[0] + "_actions"
				119	)
				120	split_exec_log(exec_log1_path, exec_log1_output_dir_path)
				121
				122	if exec_log2_path:
				123	exec_log2_output_dir_path = Path(
				124	str(exec_log2_path).split(".", maxsplit=1)[0] + "_actions"
				125	)
				126	split_exec_log(exec_log2_path, exec_log2_output_dir_path)
				127
				128
				129	def _get_parser() -> commandline.ArgumentParser:
				130	"""Build the argument parser."""
				131
				132	parser = commandline.ArgumentParser()
				133
				134	parser.add_argument(
				135	"--exec_log1",
				136	dest="exec_log1_filename",
				137	help="The path to the first exec log to analyze.",
				138	)
				139
				140	parser.add_argument(
				141	"--exec_log2",
				142	dest="exec_log2_filename",
				143	help="The path to the second exec log to analyze.",
				144	)
				145
				146	return parser
				147
				148
				149	def main(argv: Optional[List[str]]) -> Optional[int]:
				150	"""Main."""
				151	parser = _get_parser()
				152	opts = parser.parse_args(argv)
				153
				154	path1 = Path(opts.exec_log1_filename)
				155	path2 = None
				156	if opts.exec_log2_filename:
				157	path2 = Path(opts.exec_log2_filename)
				158	analyze_exec_logs(path1, path2)