Edward Lemur | 352808f | 2019-10-08 17:51:05 +0000 | [diff] [blame] | 1 | #!/usr/bin/env vpython3 |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 2 | # Copyright 2013 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 5 | """Usage: %prog [options] [<commitref>]* |
| 6 | |
| 7 | If no <commitref>'s are supplied, it defaults to HEAD. |
| 8 | |
| 9 | Calculates the generation number for one or more commits in a git repo. |
| 10 | |
| 11 | Generation number of a commit C with parents P is defined as: |
| 12 | generation_number(C, []) = 0 |
| 13 | generation_number(C, P) = max(map(generation_number, P)) + 1 |
| 14 | |
| 15 | This number can be used to order commits relative to each other, as long as for |
| 16 | any pair of the commits, one is an ancestor of the other. |
| 17 | |
| 18 | Since calculating the generation number of a commit requires walking that |
| 19 | commit's entire history, this script caches all calculated data inside the git |
| 20 | repo that it operates on in the ref 'refs/number/commits'. |
| 21 | """ |
| 22 | |
| 23 | import binascii |
| 24 | import collections |
| 25 | import logging |
| 26 | import optparse |
| 27 | import os |
| 28 | import struct |
| 29 | import sys |
| 30 | import tempfile |
| 31 | |
| 32 | import git_common as git |
| 33 | import subprocess2 |
| 34 | |
| 35 | CHUNK_FMT = '!20sL' |
| 36 | CHUNK_SIZE = struct.calcsize(CHUNK_FMT) |
| 37 | DIRTY_TREES = collections.defaultdict(int) |
| 38 | REF = 'refs/number/commits' |
nodir@chromium.org | ee74070 | 2014-04-03 01:43:32 +0000 | [diff] [blame] | 39 | AUTHOR_NAME = 'git-number' |
| 40 | AUTHOR_EMAIL = 'chrome-infrastructure-team@google.com' |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 41 | |
| 42 | # Number of bytes to use for the prefix on our internal number structure. |
Quinten Yearsley | 925cedb | 2020-04-13 17:49:39 +0000 | [diff] [blame] | 43 | # 0 is slow to deserialize. 2 creates way too much bookkeeping overhead (would |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 44 | # need to reimplement cache data structures to be a bit more sophisticated than |
| 45 | # dicts. 1 seems to be just right. |
| 46 | PREFIX_LEN = 1 |
| 47 | |
| 48 | # Set this to 'threads' to gather coverage data while testing. |
| 49 | POOL_KIND = 'procs' |
| 50 | |
| 51 | |
| 52 | def pathlify(hash_prefix): |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 53 | """Converts a binary object hash prefix into a posix path, one folder per |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 54 | byte. |
| 55 | |
| 56 | >>> pathlify('\xDE\xAD') |
| 57 | 'de/ad' |
| 58 | """ |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 59 | return '/'.join('%02x' % b for b in hash_prefix) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 60 | |
| 61 | |
| 62 | @git.memoize_one(threadsafe=False) |
| 63 | def get_number_tree(prefix_bytes): |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 64 | """Returns a dictionary of the git-number registry specified by |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 65 | |prefix_bytes|. |
| 66 | |
| 67 | This is in the form of {<full binary ref>: <gen num> ...} |
| 68 | |
| 69 | >>> get_number_tree('\x83\xb4') |
| 70 | {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...} |
| 71 | """ |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 72 | ref = '%s:%s' % (REF, pathlify(prefix_bytes)) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 73 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 74 | try: |
| 75 | raw = git.run('cat-file', 'blob', ref, autostrip=False, decode=False) |
| 76 | return dict( |
| 77 | struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE) |
| 78 | for i in range(len(raw) // CHUNK_SIZE)) |
| 79 | except subprocess2.CalledProcessError: |
| 80 | return {} |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 81 | |
| 82 | |
| 83 | @git.memoize_one(threadsafe=False) |
| 84 | def get_num(commit_hash): |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 85 | """Returns the generation number for a commit. |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 86 | |
| 87 | Returns None if the generation number for this commit hasn't been calculated |
| 88 | yet (see load_generation_numbers()). |
| 89 | """ |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 90 | return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 91 | |
| 92 | |
| 93 | def clear_caches(on_disk=False): |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 94 | """Clears in-process caches for e.g. unit testing.""" |
| 95 | get_number_tree.clear() |
| 96 | get_num.clear() |
| 97 | if on_disk: |
| 98 | git.run('update-ref', '-d', REF) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 99 | |
| 100 | |
| 101 | def intern_number_tree(tree): |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 102 | """Transforms a number tree (in the form returned by |get_number_tree|) into |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 103 | a git blob. |
| 104 | |
| 105 | Returns the git blob id as hex-encoded string. |
| 106 | |
| 107 | >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169} |
| 108 | >>> intern_number_tree(d) |
| 109 | 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce' |
| 110 | """ |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 111 | with tempfile.TemporaryFile() as f: |
| 112 | for k, v in sorted(tree.items()): |
| 113 | f.write(struct.pack(CHUNK_FMT, k, v)) |
| 114 | f.seek(0) |
| 115 | return git.intern_f(f) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 116 | |
| 117 | |
Edward Lemur | 352808f | 2019-10-08 17:51:05 +0000 | [diff] [blame] | 118 | def leaf_map_fn(pre_tree): |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 119 | """Converts a prefix and number tree into a git index line.""" |
| 120 | pre, tree = pre_tree |
| 121 | return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre)) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 122 | |
| 123 | |
| 124 | def finalize(targets): |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 125 | """Saves all cache data to the git repository. |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 126 | |
| 127 | After calculating the generation number for |targets|, call finalize() to |
| 128 | save all the work to the git repository. |
| 129 | |
| 130 | This in particular saves the trees referred to by DIRTY_TREES. |
| 131 | """ |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 132 | if not DIRTY_TREES: |
| 133 | return |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 134 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 135 | msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.values()) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 136 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 137 | idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx') |
| 138 | env = os.environ.copy() |
| 139 | env['GIT_INDEX_FILE'] = str(idx) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 140 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 141 | progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES) |
| 142 | with git.ProgressPrinter(progress_message) as inc: |
| 143 | git.run('read-tree', REF, env=env) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 144 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 145 | prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES)) |
| 146 | updater = subprocess2.Popen( |
| 147 | ['git', 'update-index', '-z', '--index-info'], |
| 148 | stdin=subprocess2.PIPE, |
| 149 | env=env) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 150 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 151 | with git.ScopedPool(kind=POOL_KIND) as leaf_pool: |
| 152 | for item in leaf_pool.imap(leaf_map_fn, prefixes_trees): |
| 153 | updater.stdin.write(item.encode()) |
| 154 | inc() |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 155 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 156 | updater.stdin.close() |
| 157 | updater.wait() |
| 158 | assert updater.returncode == 0 |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 159 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 160 | tree_id = git.run('write-tree', env=env) |
| 161 | commit_cmd = [ |
| 162 | # Git user.name and/or user.email may not be configured, so |
| 163 | # specifying them explicitly. They are not used, but required by |
| 164 | # Git. |
| 165 | '-c', |
| 166 | 'user.name=%s' % AUTHOR_NAME, |
| 167 | '-c', |
| 168 | 'user.email=%s' % AUTHOR_EMAIL, |
| 169 | 'commit-tree', |
| 170 | '-m', |
| 171 | msg, |
| 172 | '-p' |
| 173 | ] + git.hash_multi(REF) |
| 174 | for t in targets: |
| 175 | commit_cmd.extend(['-p', binascii.hexlify(t).decode()]) |
| 176 | commit_cmd.append(tree_id) |
| 177 | commit_hash = git.run(*commit_cmd) |
| 178 | git.run('update-ref', REF, commit_hash) |
| 179 | DIRTY_TREES.clear() |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 180 | |
| 181 | |
| 182 | def preload_tree(prefix): |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 183 | """Returns the prefix and parsed tree object for the specified prefix.""" |
| 184 | return prefix, get_number_tree(prefix) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 185 | |
| 186 | |
| 187 | def all_prefixes(depth=PREFIX_LEN): |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 188 | prefixes = [bytes([i]) for i in range(255)] |
| 189 | for x in prefixes: |
| 190 | # This isn't covered because PREFIX_LEN currently == 1 |
| 191 | if depth > 1: # pragma: no cover |
| 192 | for r in all_prefixes(depth - 1): |
| 193 | yield x + r |
| 194 | else: |
| 195 | yield x |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 196 | |
| 197 | |
| 198 | def load_generation_numbers(targets): |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 199 | """Populates the caches of get_num and get_number_tree so they contain |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 200 | the results for |targets|. |
| 201 | |
| 202 | Loads cached numbers from disk, and calculates missing numbers if one or |
| 203 | more of |targets| is newer than the cached calculations. |
| 204 | |
| 205 | Args: |
| 206 | targets - An iterable of binary-encoded full git commit hashes. |
| 207 | """ |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 208 | # In case they pass us a generator, listify targets. |
| 209 | targets = list(targets) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 210 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 211 | if all(get_num(t) is not None for t in targets): |
| 212 | return |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 213 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 214 | if git.tree(REF) is None: |
| 215 | empty = git.mktree({}) |
| 216 | commit_hash = git.run( |
| 217 | # Git user.name and/or user.email may not be configured, so |
| 218 | # specifying them explicitly. They are not used, but required by |
| 219 | # Git. |
| 220 | '-c', |
| 221 | 'user.name=%s' % AUTHOR_NAME, |
| 222 | '-c', |
| 223 | 'user.email=%s' % AUTHOR_EMAIL, |
| 224 | 'commit-tree', |
| 225 | '-m', |
| 226 | 'Initial commit from git-number', |
| 227 | empty) |
| 228 | git.run('update-ref', REF, commit_hash) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 229 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 230 | with git.ScopedPool(kind=POOL_KIND) as pool: |
| 231 | preload_iter = pool.imap_unordered(preload_tree, all_prefixes()) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 232 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 233 | rev_list = [] |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 234 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 235 | with git.ProgressPrinter('Loading commits: %(count)d') as inc: |
| 236 | # Curiously, buffering the list into memory seems to be the fastest |
| 237 | # approach in python (as opposed to iterating over the lines in the |
| 238 | # stdout as they're produced). GIL strikes again :/ |
| 239 | cmd = [ |
| 240 | 'rev-list', |
| 241 | '--topo-order', |
| 242 | '--parents', |
| 243 | '--reverse', |
| 244 | '^' + REF, |
| 245 | ] + [binascii.hexlify(target).decode() for target in targets] |
| 246 | for line in git.run(*cmd).splitlines(): |
| 247 | tokens = [binascii.unhexlify(token) for token in line.split()] |
| 248 | rev_list.append((tokens[0], tokens[1:])) |
| 249 | inc() |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 250 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 251 | get_number_tree.update(preload_iter) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 252 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 253 | with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc: |
| 254 | for commit_hash, pars in rev_list: |
| 255 | num = max(map(get_num, pars)) + 1 if pars else 0 |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 256 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 257 | prefix = commit_hash[:PREFIX_LEN] |
| 258 | get_number_tree(prefix)[commit_hash] = num |
| 259 | DIRTY_TREES[prefix] += 1 |
| 260 | get_num.set(commit_hash, num) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 261 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 262 | inc() |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 263 | |
| 264 | |
| 265 | def main(): # pragma: no cover |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 266 | parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__) |
| 267 | parser.add_option('--no-cache', |
| 268 | action='store_true', |
| 269 | help='Do not actually cache anything we calculate.') |
| 270 | parser.add_option('--reset', |
| 271 | action='store_true', |
| 272 | help='Reset the generation number cache and quit.') |
| 273 | parser.add_option('-v', |
| 274 | '--verbose', |
| 275 | action='count', |
| 276 | default=0, |
| 277 | help='Be verbose. Use more times for more verbosity.') |
| 278 | opts, args = parser.parse_args() |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 279 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 280 | levels = [logging.ERROR, logging.INFO, logging.DEBUG] |
| 281 | logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)]) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 282 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 283 | # 'git number' should only be used on bots. |
| 284 | if os.getenv('CHROME_HEADLESS') != '1': |
| 285 | logging.error( |
| 286 | "'git-number' is an infrastructure tool that is only " |
| 287 | "intended to be used internally by bots. Developers should " |
| 288 | "use the 'Cr-Commit-Position' value in the commit's message.") |
| 289 | return 1 |
dnj@chromium.org | e57a6eb | 2014-09-02 20:49:59 +0000 | [diff] [blame] | 290 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 291 | if opts.reset: |
| 292 | clear_caches(on_disk=True) |
| 293 | return |
sbc@chromium.org | 013731e | 2015-02-26 18:28:43 +0000 | [diff] [blame] | 294 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 295 | try: |
| 296 | targets = git.parse_commitrefs(*(args or ['HEAD'])) |
| 297 | except git.BadCommitRefException as e: |
| 298 | parser.error(e) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 299 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 300 | load_generation_numbers(targets) |
| 301 | if not opts.no_cache: |
| 302 | finalize(targets) |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 303 | |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 304 | print('\n'.join(map(str, map(get_num, targets)))) |
| 305 | return 0 |
iannucci@chromium.org | aa74cf6 | 2013-11-19 20:00:49 +0000 | [diff] [blame] | 306 | |
| 307 | |
| 308 | if __name__ == '__main__': # pragma: no cover |
Mike Frysinger | 124bb8e | 2023-09-06 05:48:55 +0000 | [diff] [blame] | 309 | try: |
| 310 | sys.exit(main()) |
| 311 | except KeyboardInterrupt: |
| 312 | sys.stderr.write('interrupted\n') |
| 313 | sys.exit(1) |