Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # Copyright 2021 The Chromium OS Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | """Run an equivalent to the backfill pipeline locally and generate diffs. |
| 6 | |
| 7 | Parse the actual current builder configurations from BuildBucket and run |
| 8 | the join_config_payloads.py script locally. Generate a diff that shows any |
| 9 | changes using the tip-of-tree code vs what's running in production. |
| 10 | """ |
| 11 | |
| 12 | import argparse |
| 13 | import collections |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 14 | import functools |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 15 | import itertools |
| 16 | import json |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 17 | import logging |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 18 | import multiprocessing |
| 19 | import multiprocessing.pool |
| 20 | import os |
| 21 | import pathlib |
| 22 | import subprocess |
| 23 | import sys |
| 24 | import tempfile |
| 25 | import time |
| 26 | |
| 27 | # resolve relative directories |
| 28 | this_dir = pathlib.Path(os.path.dirname(os.path.abspath(__file__))) |
| 29 | hwid_path = (this_dir / "../../platform/chromeos-hwid/v3").resolve() |
| 30 | join_script = (this_dir / "../payload_utils/join_config_payloads.py").resolve() |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 31 | merge_script = (this_dir / "../payload_utils/aggregate_messages.py").resolve() |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 32 | public_path = (this_dir / "../../overlays").resolve() |
| 33 | private_path = (this_dir / "../../private-overlays").resolve() |
| 34 | project_path = (this_dir / "../../project").resolve() |
| 35 | |
| 36 | # escape sequence to clear the current line and return to column 0 |
| 37 | CLEAR_LINE = "\033[2K\r" |
| 38 | |
| 39 | # record to store backfiller configuration in |
| 40 | BackfillConfig = collections.namedtuple('BackfillConfig', [ |
| 41 | 'program', |
| 42 | 'project', |
| 43 | 'hwid_key', |
| 44 | 'public_model', |
| 45 | 'private_repo', |
| 46 | 'private_model', |
| 47 | ]) |
| 48 | |
| 49 | |
| 50 | class Spinner(object): |
| 51 | """Simple class to print a message and update a little spinning icon.""" |
| 52 | |
| 53 | def __init__(self, message): |
| 54 | self.message = message |
| 55 | self.spin = itertools.cycle("◐◓◑◒") |
| 56 | |
| 57 | def tick(self): |
| 58 | sys.stderr.write(CLEAR_LINE + "[%c] %s" % (next(self.spin), self.message)) |
| 59 | |
| 60 | def done(self, success=True): |
| 61 | if success: |
| 62 | sys.stderr.write(CLEAR_LINE + "[✔] %s\n" % self.message) |
| 63 | else: |
Sean McAllister | f9d0a6b | 2021-04-09 08:28:47 -0600 | [diff] [blame^] | 64 | sys.stderr.write(CLEAR_LINE + "[✘] %s\n" % self.message) |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 65 | |
| 66 | |
| 67 | def call_and_spin(message, stdin, *cmd): |
| 68 | """Execute a command and print a nice status while we wait. |
| 69 | |
| 70 | Args: |
| 71 | message (str): message to print while we wait (along with spinner) |
| 72 | stdin (bytes): array of bytes to send as the stdin (or None) |
| 73 | cmd ([str]): command and any options and arguments |
| 74 | |
| 75 | Return: |
| 76 | tuple of (data, status) containing process stdout and status |
| 77 | """ |
| 78 | |
| 79 | with multiprocessing.pool.ThreadPool(processes=1) as pool: |
| 80 | result = pool.apply_async(subprocess.run, (cmd,), { |
| 81 | 'input': stdin, |
| 82 | 'capture_output': True, |
| 83 | 'text': True, |
| 84 | }) |
| 85 | |
| 86 | spinner = Spinner(message) |
| 87 | spinner.tick() |
| 88 | |
| 89 | while not result.ready(): |
| 90 | spinner.tick() |
| 91 | time.sleep(0.05) |
| 92 | |
| 93 | process = result.get() |
| 94 | spinner.done(process.returncode == 0) |
| 95 | |
| 96 | return process.stdout, process.returncode |
| 97 | |
| 98 | |
| 99 | def parse_build_property(build, name): |
| 100 | """Parse out a property value from a build and return its value. |
| 101 | |
| 102 | Properties are always JSON values, so we decode them and return the |
| 103 | resulting object |
| 104 | |
| 105 | Args: |
| 106 | build (dict): json object containing BuildBucket properties |
| 107 | name (str): name of the property to look up |
| 108 | |
| 109 | Return: |
| 110 | decoded property value or None if not found |
| 111 | """ |
Sean McAllister | f9d0a6b | 2021-04-09 08:28:47 -0600 | [diff] [blame^] | 112 | return json.loads(build["config"]["properties"]).get(name) |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 113 | |
| 114 | |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 115 | def jqdiff(filea, fileb, filt="."): |
| 116 | """Diff two json files using jq to get a semantic diff. |
| 117 | |
| 118 | Args: |
| 119 | filea (str): first file to compare |
| 120 | fileb (str): second file to compare |
| 121 | filt (str): if supplied, jq filter to apply to inputs before comparing |
| 122 | The filter is quoted with '' for the user so take care when specifying. |
| 123 | |
| 124 | Return: |
| 125 | diff between inputs |
| 126 | """ |
| 127 | |
| 128 | process = subprocess.run( |
| 129 | "diff -u <(jq -S '{}' {}) <(jq -S '{}' {})".format( |
| 130 | filt, |
| 131 | filea, |
| 132 | filt, |
| 133 | fileb, |
| 134 | ), |
| 135 | shell=True, |
| 136 | text=True, |
| 137 | capture_output=True, |
| 138 | ) |
| 139 | return process.stdout |
| 140 | |
| 141 | |
| 142 | def run_backfill(config, logname=None, run_imported=True, run_joined=True): |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 143 | """Run a single backfill job, return diff of current and new output. |
| 144 | |
| 145 | Args: |
| 146 | config: BackfillConfig instance for the backfill operation. |
| 147 | logname: Filename to redirect stderr to from backfill |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 148 | default is to suppress the output |
| 149 | run_imported: If True, generate a diff for the imported payload |
| 150 | run_joined: If True, generate a diff for the joined payload |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 151 | """ |
| 152 | |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 153 | def run_diff(cmd, current, output): |
| 154 | """Execute cmd and diff the current and output files""" |
| 155 | logfile.write("running: {}\n".format(" ".join(map(str, cmd)))) |
| 156 | |
| 157 | subprocess.run(cmd, stderr=logfile, check=True) |
| 158 | |
| 159 | # if one or the other file doesn't exist, return the other as a diff |
| 160 | if current.exists() != output.exists(): |
| 161 | if current.exists(): |
| 162 | return open(current).read() |
| 163 | return open(output).read() |
| 164 | |
| 165 | # otherwise run diff |
| 166 | return jqdiff(current, output) |
| 167 | |
| 168 | #### start of function body |
| 169 | |
Sean McAllister | f658fb2 | 2021-03-22 10:39:41 -0600 | [diff] [blame] | 170 | # path to project repo and config bundle |
| 171 | path_repo = project_path / config.program / config.project |
| 172 | path_config = path_repo / "generated/config.jsonproto" |
| 173 | |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 174 | logfile = subprocess.DEVNULL |
| 175 | if logname: |
| 176 | logfile = open(logname, "a") |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 177 | |
| 178 | # reef is currently broken because it _needs_ a real portage environment |
| 179 | # to pull in common code. |
| 180 | # TODO(https://crbug.com/1144956): fix when reef is corrected |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 181 | if config.program == "reef": |
| 182 | return None |
| 183 | |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 184 | cmd = [join_script, "--l", "DEBUG"] |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 185 | cmd.extend(["--program-name", config.program]) |
| 186 | cmd.extend(["--project-name", config.project]) |
| 187 | |
Sean McAllister | f658fb2 | 2021-03-22 10:39:41 -0600 | [diff] [blame] | 188 | if path_config.exists(): |
| 189 | cmd.extend(["--config-bundle", path_config]) |
| 190 | |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 191 | if config.hwid_key: |
| 192 | cmd.extend(["--hwid", hwid_path / config.hwid_key]) |
| 193 | |
| 194 | if config.public_model: |
| 195 | cmd.extend(["--public-model", public_path / config.public_model]) |
| 196 | |
| 197 | if config.private_model: |
| 198 | overlay = config.private_repo.split('/')[-1] |
| 199 | cmd.extend( |
| 200 | ["--private-model", private_path / overlay / config.private_model]) |
| 201 | |
| 202 | # create temporary directory for output |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 203 | diff_imported = "" |
| 204 | diff_joined = "" |
| 205 | with tempfile.TemporaryDirectory() as scratch: |
| 206 | scratch = pathlib.Path(scratch) |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 207 | |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 208 | # generate diff of imported payloads |
| 209 | path_imported_old = path_repo / "generated/imported.jsonproto" |
| 210 | path_imported_new = scratch / "imported.jsonproto" |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 211 | |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 212 | if run_imported: |
| 213 | diff_imported = run_diff( |
Sean McAllister | f658fb2 | 2021-03-22 10:39:41 -0600 | [diff] [blame] | 214 | cmd + ["--import-only", "--output", path_imported_new], |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 215 | path_imported_old, |
| 216 | path_imported_new, |
| 217 | ) |
| 218 | |
| 219 | # generate diff of joined payloads |
| 220 | if run_joined and path_config.exists(): |
| 221 | path_joined_old = path_repo / "generated/joined.jsonproto" |
| 222 | path_joined_new = scratch / "joined.jsonproto" |
| 223 | |
Sean McAllister | f658fb2 | 2021-03-22 10:39:41 -0600 | [diff] [blame] | 224 | diff_joined = run_diff(cmd + ["--output", path_joined_new], |
| 225 | path_joined_old, path_joined_new) |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 226 | |
| 227 | return ("{}-{}".format(config.program, |
| 228 | config.project), diff_imported, diff_joined) |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 229 | |
| 230 | |
| 231 | def run_backfills(args, configs): |
| 232 | """Run backfill pipeline for each builder in configs. |
| 233 | |
| 234 | Generate an über diff showing the changes that the current ToT |
| 235 | join_config_payloads code would generate vs what's currently committed. |
| 236 | |
| 237 | Write the result to the output file specified on the command line. |
| 238 | |
| 239 | Args: |
| 240 | args: command line arguments from argparse |
| 241 | configs: list of BackfillConfig instances to execute |
| 242 | |
| 243 | Return: |
| 244 | nothing |
| 245 | """ |
| 246 | |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 247 | # create a logfile if requested |
| 248 | kwargs = {} |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 249 | kwargs["run_joined"] = args.joined_diff is not None |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 250 | if args.logfile: |
| 251 | # open and close the logfile to truncate it so backfills can append |
| 252 | # We can't pickle the file object and send it as an argument with |
| 253 | # multiprocessing, so this is a workaround for that limitation |
| 254 | with open(args.logfile, "w"): |
| 255 | kwargs["logname"] = args.logfile |
| 256 | |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 257 | nproc = 32 |
| 258 | nconfig = len(configs) |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 259 | imported_diffs = {} |
| 260 | joined_diffs = {} |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 261 | with multiprocessing.Pool(processes=nproc) as pool: |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 262 | results = pool.imap_unordered( |
| 263 | functools.partial(run_backfill, **kwargs), configs, chunksize=1) |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 264 | for ii, result in enumerate(results, 1): |
| 265 | sys.stderr.write( |
| 266 | CLEAR_LINE + "[{}/{}] Processing backfills".format(ii, nconfig),) |
| 267 | |
| 268 | if result: |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 269 | key, imported, joined = result |
| 270 | imported_diffs[key] = imported |
| 271 | joined_diffs[key] = joined |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 272 | |
| 273 | sys.stderr.write(CLEAR_LINE + "[✔] Processing backfills") |
| 274 | |
| 275 | # generate final über diff showing all the changes |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 276 | with open(args.imported_diff, "w") as ofile: |
| 277 | for name, result in sorted(imported_diffs.items()): |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 278 | ofile.write("## ---------------------\n") |
| 279 | ofile.write("## diff for {}\n".format(name)) |
| 280 | ofile.write("\n") |
| 281 | ofile.write(result + "\n") |
| 282 | |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 283 | if args.joined_diff: |
| 284 | with open(args.joined_diff, "w") as ofile: |
| 285 | for name, result in sorted(joined_diffs.items()): |
| 286 | ofile.write("## ---------------------\n") |
| 287 | ofile.write("## diff for {}\n".format(name)) |
| 288 | ofile.write("\n") |
| 289 | ofile.write(result + "\n") |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 290 | |
| 291 | |
| 292 | def main(): |
| 293 | parser = argparse.ArgumentParser( |
| 294 | description=__doc__, |
| 295 | formatter_class=argparse.RawTextHelpFormatter, |
| 296 | ) |
| 297 | |
| 298 | parser.add_argument( |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 299 | "--imported-diff", |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 300 | type=str, |
| 301 | required=True, |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 302 | help="target file for diff on imported.jsonproto payload", |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 303 | ) |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 304 | |
| 305 | parser.add_argument( |
| 306 | "--joined-diff", |
| 307 | type=str, |
| 308 | help="target file for diff on joined.jsonproto payload", |
| 309 | ) |
| 310 | |
Sean McAllister | 9b5a33e | 2021-02-26 10:53:54 -0700 | [diff] [blame] | 311 | parser.add_argument( |
| 312 | "-l", |
| 313 | "--logfile", |
| 314 | type=str, |
| 315 | help="target file to log output from backfills", |
| 316 | ) |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 317 | args = parser.parse_args() |
| 318 | |
| 319 | # query BuildBucket for current builder configurations in the infra bucket |
| 320 | data, status = call_and_spin( |
Sean McAllister | f9d0a6b | 2021-04-09 08:28:47 -0600 | [diff] [blame^] | 321 | "Listing backfill builder", |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 322 | json.dumps({ |
Sean McAllister | f9d0a6b | 2021-04-09 08:28:47 -0600 | [diff] [blame^] | 323 | "id": { |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 324 | "project": "chromeos", |
| 325 | "bucket": "infra", |
Sean McAllister | f9d0a6b | 2021-04-09 08:28:47 -0600 | [diff] [blame^] | 326 | "builder": "backfiller" |
| 327 | } |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 328 | }), |
| 329 | "prpc", |
| 330 | "call", |
| 331 | "cr-buildbucket.appspot.com", |
Sean McAllister | f9d0a6b | 2021-04-09 08:28:47 -0600 | [diff] [blame^] | 332 | "buildbucket.v2.Builders.GetBuilder", |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 333 | ) |
| 334 | |
| 335 | if status != 0: |
| 336 | print( |
| 337 | "Error executing prpc call to list builders. Try 'prpc login' first.", |
| 338 | file=sys.stderr, |
| 339 | ) |
| 340 | sys.exit(status) |
| 341 | |
Sean McAllister | f9d0a6b | 2021-04-09 08:28:47 -0600 | [diff] [blame^] | 342 | builder = json.loads(data) |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 343 | |
| 344 | # construct backfill config from the configured builder properties |
| 345 | configs = [] |
Sean McAllister | f9d0a6b | 2021-04-09 08:28:47 -0600 | [diff] [blame^] | 346 | for builder_config in parse_build_property(builder, "configs"): |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 347 | config = BackfillConfig( |
Sean McAllister | f9d0a6b | 2021-04-09 08:28:47 -0600 | [diff] [blame^] | 348 | program=builder_config["program_name"], |
| 349 | project=builder_config["project_name"], |
| 350 | hwid_key=builder_config.get("hwid_key"), |
| 351 | public_model=builder_config.get("public_yaml_path"), |
| 352 | private_repo=builder_config.get("private_yaml", {}).get("repo"), |
| 353 | private_model=builder_config.get("private_yaml", {}).get("path"), |
Sean McAllister | e820fc0 | 2021-03-20 18:34:16 -0600 | [diff] [blame] | 354 | ) |
| 355 | |
| 356 | path_repo = project_path / config.program / config.project |
| 357 | if not path_repo.exists(): |
| 358 | logging.warning("{}/{} does not exist locally, skipping".format( |
| 359 | config.program, config.project)) |
| 360 | continue |
| 361 | |
| 362 | configs.append(config) |
Sean McAllister | ffce55f | 2021-02-22 20:08:18 -0700 | [diff] [blame] | 363 | |
| 364 | run_backfills(args, configs) |
| 365 | |
| 366 | |
| 367 | if __name__ == "__main__": |
| 368 | main() |