Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # Copyright 2021 The Chromium OS Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | """Run an equivalent to the backfill pipeline locally and generate diffs. |
| 6 | |
| 7 | Parse the actual current builder configurations from BuildBucket and run |
| 8 | the join_config_payloads.py script locally. Generate a diff that shows any |
| 9 | changes using the tip-of-tree code vs what's running in production. |
| 10 | """ |
| 11 | |
| 12 | import argparse |
| 13 | import collections |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 14 | import functools |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 15 | import itertools |
| 16 | import json |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 17 | import logging |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 18 | import multiprocessing |
| 19 | import multiprocessing.pool |
| 20 | import os |
| 21 | import pathlib |
| 22 | import subprocess |
| 23 | import sys |
| 24 | import tempfile |
| 25 | import time |
| 26 | |
| 27 | # resolve relative directories |
| 28 | this_dir = pathlib.Path(os.path.dirname(os.path.abspath(__file__))) |
| 29 | hwid_path = (this_dir / "../../platform/chromeos-hwid/v3").resolve() |
| 30 | join_script = (this_dir / "../payload_utils/join_config_payloads.py").resolve() |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 31 | merge_script = (this_dir / "../payload_utils/aggregate_messages.py").resolve() |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 32 | public_path = (this_dir / "../../overlays").resolve() |
| 33 | private_path = (this_dir / "../../private-overlays").resolve() |
| 34 | project_path = (this_dir / "../../project").resolve() |
| 35 | |
| 36 | # escape sequence to clear the current line and return to column 0 |
| 37 | CLEAR_LINE = "\033[2K\r" |
| 38 | |
| 39 | # record to store backfiller configuration in |
| 40 | BackfillConfig = collections.namedtuple('BackfillConfig', [ |
| 41 | 'program', |
| 42 | 'project', |
| 43 | 'hwid_key', |
| 44 | 'public_model', |
| 45 | 'private_repo', |
| 46 | 'private_model', |
| 47 | ]) |
| 48 | |
| 49 | |
| 50 | class Spinner(object): |
| 51 | """Simple class to print a message and update a little spinning icon.""" |
| 52 | |
| 53 | def __init__(self, message): |
| 54 | self.message = message |
| 55 | self.spin = itertools.cycle("◐◓◑◒") |
| 56 | |
| 57 | def tick(self): |
| 58 | sys.stderr.write(CLEAR_LINE + "[%c] %s" % (next(self.spin), self.message)) |
| 59 | |
| 60 | def done(self, success=True): |
| 61 | if success: |
| 62 | sys.stderr.write(CLEAR_LINE + "[✔] %s\n" % self.message) |
| 63 | else: |
| 64 | sys.stderr.write(CLEAR_LINE + "[✘] %s\n" % message) |
| 65 | |
| 66 | |
| 67 | def call_and_spin(message, stdin, *cmd): |
| 68 | """Execute a command and print a nice status while we wait. |
| 69 | |
| 70 | Args: |
| 71 | message (str): message to print while we wait (along with spinner) |
| 72 | stdin (bytes): array of bytes to send as the stdin (or None) |
| 73 | cmd ([str]): command and any options and arguments |
| 74 | |
| 75 | Return: |
| 76 | tuple of (data, status) containing process stdout and status |
| 77 | """ |
| 78 | |
| 79 | with multiprocessing.pool.ThreadPool(processes=1) as pool: |
| 80 | result = pool.apply_async(subprocess.run, (cmd,), { |
| 81 | 'input': stdin, |
| 82 | 'capture_output': True, |
| 83 | 'text': True, |
| 84 | }) |
| 85 | |
| 86 | spinner = Spinner(message) |
| 87 | spinner.tick() |
| 88 | |
| 89 | while not result.ready(): |
| 90 | spinner.tick() |
| 91 | time.sleep(0.05) |
| 92 | |
| 93 | process = result.get() |
| 94 | spinner.done(process.returncode == 0) |
| 95 | |
| 96 | return process.stdout, process.returncode |
| 97 | |
| 98 | |
| 99 | def parse_build_property(build, name): |
| 100 | """Parse out a property value from a build and return its value. |
| 101 | |
| 102 | Properties are always JSON values, so we decode them and return the |
| 103 | resulting object |
| 104 | |
| 105 | Args: |
| 106 | build (dict): json object containing BuildBucket properties |
| 107 | name (str): name of the property to look up |
| 108 | |
| 109 | Return: |
| 110 | decoded property value or None if not found |
| 111 | """ |
| 112 | |
| 113 | properties = build["config"]["recipe"]["propertiesJ"] |
| 114 | for prop in properties: |
| 115 | if prop.startswith(name): |
| 116 | return json.loads(prop[len(name) + 1:]) |
| 117 | return None |
| 118 | |
| 119 | |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 120 | def jqdiff(filea, fileb, filt="."): |
| 121 | """Diff two json files using jq to get a semantic diff. |
| 122 | |
| 123 | Args: |
| 124 | filea (str): first file to compare |
| 125 | fileb (str): second file to compare |
| 126 | filt (str): if supplied, jq filter to apply to inputs before comparing |
| 127 | The filter is quoted with '' for the user so take care when specifying. |
| 128 | |
| 129 | Return: |
| 130 | diff between inputs |
| 131 | """ |
| 132 | |
| 133 | process = subprocess.run( |
| 134 | "diff -u <(jq -S '{}' {}) <(jq -S '{}' {})".format( |
| 135 | filt, |
| 136 | filea, |
| 137 | filt, |
| 138 | fileb, |
| 139 | ), |
| 140 | shell=True, |
| 141 | text=True, |
| 142 | capture_output=True, |
| 143 | ) |
| 144 | return process.stdout |
| 145 | |
| 146 | |
| 147 | def run_backfill(config, logname=None, run_imported=True, run_joined=True): |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 148 | """Run a single backfill job, return diff of current and new output. |
| 149 | |
| 150 | Args: |
| 151 | config: BackfillConfig instance for the backfill operation. |
| 152 | logname: Filename to redirect stderr to from backfill |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 153 | default is to suppress the output |
| 154 | run_imported: If True, generate a diff for the imported payload |
| 155 | run_joined: If True, generate a diff for the joined payload |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 156 | """ |
| 157 | |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 158 | def run_diff(cmd, current, output): |
| 159 | """Execute cmd and diff the current and output files""" |
| 160 | logfile.write("running: {}\n".format(" ".join(map(str, cmd)))) |
| 161 | |
| 162 | subprocess.run(cmd, stderr=logfile, check=True) |
| 163 | |
| 164 | # if one or the other file doesn't exist, return the other as a diff |
| 165 | if current.exists() != output.exists(): |
| 166 | if current.exists(): |
| 167 | return open(current).read() |
| 168 | return open(output).read() |
| 169 | |
| 170 | # otherwise run diff |
| 171 | return jqdiff(current, output) |
| 172 | |
| 173 | #### start of function body |
| 174 | |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 175 | logfile = subprocess.DEVNULL |
| 176 | if logname: |
| 177 | logfile = open(logname, "a") |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 178 | |
| 179 | # reef is currently broken because it _needs_ a real portage environment |
| 180 | # to pull in common code. |
| 181 | # TODO(https://crbug.com/1144956): fix when reef is corrected |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 182 | if config.program == "reef": |
| 183 | return None |
| 184 | |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 185 | cmd = [join_script, "--l", "DEBUG"] |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 186 | cmd.extend(["--program-name", config.program]) |
| 187 | cmd.extend(["--project-name", config.project]) |
| 188 | |
| 189 | if config.hwid_key: |
| 190 | cmd.extend(["--hwid", hwid_path / config.hwid_key]) |
| 191 | |
| 192 | if config.public_model: |
| 193 | cmd.extend(["--public-model", public_path / config.public_model]) |
| 194 | |
| 195 | if config.private_model: |
| 196 | overlay = config.private_repo.split('/')[-1] |
| 197 | cmd.extend( |
| 198 | ["--private-model", private_path / overlay / config.private_model]) |
| 199 | |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 200 | # path to project repo and config bundle |
| 201 | path_repo = project_path / config.program / config.project |
| 202 | |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 203 | # create temporary directory for output |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 204 | diff_imported = "" |
| 205 | diff_joined = "" |
| 206 | with tempfile.TemporaryDirectory() as scratch: |
| 207 | scratch = pathlib.Path(scratch) |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 208 | |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 209 | # path to config bundle |
| 210 | path_config = path_repo / "generated/config.jsonproto" |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 211 | |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 212 | # generate diff of imported payloads |
| 213 | path_imported_old = path_repo / "generated/imported.jsonproto" |
| 214 | path_imported_new = scratch / "imported.jsonproto" |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 215 | |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 216 | if run_imported: |
| 217 | diff_imported = run_diff( |
| 218 | cmd + ["--output", path_imported_new], |
| 219 | path_imported_old, |
| 220 | path_imported_new, |
| 221 | ) |
| 222 | |
| 223 | # generate diff of joined payloads |
| 224 | if run_joined and path_config.exists(): |
| 225 | path_joined_old = path_repo / "generated/joined.jsonproto" |
| 226 | path_joined_new = scratch / "joined.jsonproto" |
| 227 | |
| 228 | diff_joined = run_diff( |
| 229 | cmd + ["--config-bundle", path_config, "--output", path_joined_new], |
| 230 | path_joined_old, path_joined_new) |
| 231 | |
| 232 | return ("{}-{}".format(config.program, |
| 233 | config.project), diff_imported, diff_joined) |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 234 | |
| 235 | |
| 236 | def run_backfills(args, configs): |
| 237 | """Run backfill pipeline for each builder in configs. |
| 238 | |
| 239 | Generate an über diff showing the changes that the current ToT |
| 240 | join_config_payloads code would generate vs what's currently committed. |
| 241 | |
| 242 | Write the result to the output file specified on the command line. |
| 243 | |
| 244 | Args: |
| 245 | args: command line arguments from argparse |
| 246 | configs: list of BackfillConfig instances to execute |
| 247 | |
| 248 | Return: |
| 249 | nothing |
| 250 | """ |
| 251 | |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 252 | # create a logfile if requested |
| 253 | kwargs = {} |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 254 | kwargs["run_joined"] = args.joined_diff is not None |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 255 | if args.logfile: |
| 256 | # open and close the logfile to truncate it so backfills can append |
| 257 | # We can't pickle the file object and send it as an argument with |
| 258 | # multiprocessing, so this is a workaround for that limitation |
| 259 | with open(args.logfile, "w"): |
| 260 | kwargs["logname"] = args.logfile |
| 261 | |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 262 | nproc = 32 |
| 263 | nconfig = len(configs) |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 264 | imported_diffs = {} |
| 265 | joined_diffs = {} |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 266 | with multiprocessing.Pool(processes=nproc) as pool: |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 267 | results = pool.imap_unordered( |
| 268 | functools.partial(run_backfill, **kwargs), configs, chunksize=1) |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 269 | for ii, result in enumerate(results, 1): |
| 270 | sys.stderr.write( |
| 271 | CLEAR_LINE + "[{}/{}] Processing backfills".format(ii, nconfig),) |
| 272 | |
| 273 | if result: |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 274 | key, imported, joined = result |
| 275 | imported_diffs[key] = imported |
| 276 | joined_diffs[key] = joined |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 277 | |
| 278 | sys.stderr.write(CLEAR_LINE + "[✔] Processing backfills") |
| 279 | |
| 280 | # generate final über diff showing all the changes |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 281 | with open(args.imported_diff, "w") as ofile: |
| 282 | for name, result in sorted(imported_diffs.items()): |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 283 | ofile.write("## ---------------------\n") |
| 284 | ofile.write("## diff for {}\n".format(name)) |
| 285 | ofile.write("\n") |
| 286 | ofile.write(result + "\n") |
| 287 | |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 288 | if args.joined_diff: |
| 289 | with open(args.joined_diff, "w") as ofile: |
| 290 | for name, result in sorted(joined_diffs.items()): |
| 291 | ofile.write("## ---------------------\n") |
| 292 | ofile.write("## diff for {}\n".format(name)) |
| 293 | ofile.write("\n") |
| 294 | ofile.write(result + "\n") |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 295 | |
| 296 | |
| 297 | def main(): |
| 298 | parser = argparse.ArgumentParser( |
| 299 | description=__doc__, |
| 300 | formatter_class=argparse.RawTextHelpFormatter, |
| 301 | ) |
| 302 | |
| 303 | parser.add_argument( |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 304 | "--imported-diff", |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 305 | type=str, |
| 306 | required=True, |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 307 | help="target file for diff on imported.jsonproto payload", |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 308 | ) |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 309 | |
| 310 | parser.add_argument( |
| 311 | "--joined-diff", |
| 312 | type=str, |
| 313 | help="target file for diff on joined.jsonproto payload", |
| 314 | ) |
| 315 | |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 316 | parser.add_argument( |
| 317 | "-l", |
| 318 | "--logfile", |
| 319 | type=str, |
| 320 | help="target file to log output from backfills", |
| 321 | ) |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 322 | args = parser.parse_args() |
| 323 | |
| 324 | # query BuildBucket for current builder configurations in the infra bucket |
| 325 | data, status = call_and_spin( |
| 326 | "Listing backfill builders", |
| 327 | json.dumps({ |
| 328 | "project": "chromeos", |
| 329 | "bucket": "infra", |
| 330 | "pageSize": 1000, |
| 331 | }), |
| 332 | "prpc", |
| 333 | "call", |
| 334 | "cr-buildbucket.appspot.com", |
| 335 | "buildbucket.v2.Builders.ListBuilders", |
| 336 | ) |
| 337 | |
| 338 | if status != 0: |
| 339 | print( |
| 340 | "Error executing prpc call to list builders. Try 'prpc login' first.", |
| 341 | file=sys.stderr, |
| 342 | ) |
| 343 | sys.exit(status) |
| 344 | |
| 345 | # filter out just the backfill builders and sort them by name |
| 346 | builders = json.loads(data)["builders"] |
| 347 | builders = [ |
| 348 | bb for bb in builders if bb["id"]["builder"].startswith("backfill") |
| 349 | ] |
| 350 | |
| 351 | # construct backfill config from the configured builder properties |
| 352 | configs = [] |
| 353 | for builder in builders: |
| 354 | public_yaml = parse_build_property(builder, "public_yaml") or {} |
| 355 | private_yaml = parse_build_property(builder, "private_yaml") or {} |
| 356 | |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 357 | config = BackfillConfig( |
| 358 | program=parse_build_property(builder, "program_name"), |
| 359 | project=parse_build_property(builder, "project_name"), |
| 360 | hwid_key=parse_build_property(builder, "hwid_key"), |
| 361 | public_model=public_yaml.get("path"), |
| 362 | private_repo=private_yaml.get("repo"), |
| 363 | private_model=private_yaml.get("path"), |
| 364 | ) |
| 365 | |
| 366 | path_repo = project_path / config.program / config.project |
| 367 | if not path_repo.exists(): |
| 368 | logging.warning("{}/{} does not exist locally, skipping".format( |
| 369 | config.program, config.project)) |
| 370 | continue |
| 371 | |
| 372 | configs.append(config) |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 373 | |
| 374 | run_backfills(args, configs) |
| 375 | |
| 376 | |
| 377 | if __name__ == "__main__": |
| 378 | main() |