blob: beae4ac7a63e8c7052028e59ceeb96d67e297756 [file] [log] [blame]
Edward Lemur352808f2019-10-08 17:51:05 +00001#!/usr/bin/env vpython3
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +00002# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +00005"""Usage: %prog [options] [<commitref>]*
6
7If no <commitref>'s are supplied, it defaults to HEAD.
8
9Calculates the generation number for one or more commits in a git repo.
10
11Generation number of a commit C with parents P is defined as:
12 generation_number(C, []) = 0
13 generation_number(C, P) = max(map(generation_number, P)) + 1
14
15This number can be used to order commits relative to each other, as long as for
16any pair of the commits, one is an ancestor of the other.
17
18Since calculating the generation number of a commit requires walking that
19commit's entire history, this script caches all calculated data inside the git
20repo that it operates on in the ref 'refs/number/commits'.
21"""
22
23import binascii
24import collections
25import logging
26import optparse
27import os
28import struct
29import sys
30import tempfile
31
32import git_common as git
33import subprocess2
34
35CHUNK_FMT = '!20sL'
36CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
37DIRTY_TREES = collections.defaultdict(int)
38REF = 'refs/number/commits'
nodir@chromium.orgee740702014-04-03 01:43:32 +000039AUTHOR_NAME = 'git-number'
40AUTHOR_EMAIL = 'chrome-infrastructure-team@google.com'
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000041
42# Number of bytes to use for the prefix on our internal number structure.
Quinten Yearsley925cedb2020-04-13 17:49:39 +000043# 0 is slow to deserialize. 2 creates way too much bookkeeping overhead (would
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000044# need to reimplement cache data structures to be a bit more sophisticated than
45# dicts. 1 seems to be just right.
46PREFIX_LEN = 1
47
48# Set this to 'threads' to gather coverage data while testing.
49POOL_KIND = 'procs'
50
51
52def pathlify(hash_prefix):
Mike Frysinger124bb8e2023-09-06 05:48:55 +000053 """Converts a binary object hash prefix into a posix path, one folder per
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000054 byte.
55
56 >>> pathlify('\xDE\xAD')
57 'de/ad'
58 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +000059 return '/'.join('%02x' % b for b in hash_prefix)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000060
61
62@git.memoize_one(threadsafe=False)
63def get_number_tree(prefix_bytes):
Mike Frysinger124bb8e2023-09-06 05:48:55 +000064 """Returns a dictionary of the git-number registry specified by
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000065 |prefix_bytes|.
66
67 This is in the form of {<full binary ref>: <gen num> ...}
68
69 >>> get_number_tree('\x83\xb4')
70 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
71 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +000072 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000073
Mike Frysinger124bb8e2023-09-06 05:48:55 +000074 try:
75 raw = git.run('cat-file', 'blob', ref, autostrip=False, decode=False)
76 return dict(
77 struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
78 for i in range(len(raw) // CHUNK_SIZE))
79 except subprocess2.CalledProcessError:
80 return {}
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000081
82
83@git.memoize_one(threadsafe=False)
84def get_num(commit_hash):
Mike Frysinger124bb8e2023-09-06 05:48:55 +000085 """Returns the generation number for a commit.
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000086
87 Returns None if the generation number for this commit hasn't been calculated
88 yet (see load_generation_numbers()).
89 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +000090 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000091
92
93def clear_caches(on_disk=False):
Mike Frysinger124bb8e2023-09-06 05:48:55 +000094 """Clears in-process caches for e.g. unit testing."""
95 get_number_tree.clear()
96 get_num.clear()
97 if on_disk:
98 git.run('update-ref', '-d', REF)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000099
100
101def intern_number_tree(tree):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000102 """Transforms a number tree (in the form returned by |get_number_tree|) into
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000103 a git blob.
104
105 Returns the git blob id as hex-encoded string.
106
107 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
108 >>> intern_number_tree(d)
109 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
110 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000111 with tempfile.TemporaryFile() as f:
112 for k, v in sorted(tree.items()):
113 f.write(struct.pack(CHUNK_FMT, k, v))
114 f.seek(0)
115 return git.intern_f(f)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000116
117
Edward Lemur352808f2019-10-08 17:51:05 +0000118def leaf_map_fn(pre_tree):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000119 """Converts a prefix and number tree into a git index line."""
120 pre, tree = pre_tree
121 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000122
123
124def finalize(targets):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000125 """Saves all cache data to the git repository.
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000126
127 After calculating the generation number for |targets|, call finalize() to
128 save all the work to the git repository.
129
130 This in particular saves the trees referred to by DIRTY_TREES.
131 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000132 if not DIRTY_TREES:
133 return
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000134
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000135 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.values())
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000136
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000137 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
138 env = os.environ.copy()
139 env['GIT_INDEX_FILE'] = str(idx)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000140
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000141 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
142 with git.ProgressPrinter(progress_message) as inc:
143 git.run('read-tree', REF, env=env)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000144
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000145 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
146 updater = subprocess2.Popen(
147 ['git', 'update-index', '-z', '--index-info'],
148 stdin=subprocess2.PIPE,
149 env=env)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000150
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000151 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
152 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
153 updater.stdin.write(item.encode())
154 inc()
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000155
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000156 updater.stdin.close()
157 updater.wait()
158 assert updater.returncode == 0
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000159
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000160 tree_id = git.run('write-tree', env=env)
161 commit_cmd = [
162 # Git user.name and/or user.email may not be configured, so
163 # specifying them explicitly. They are not used, but required by
164 # Git.
165 '-c',
166 'user.name=%s' % AUTHOR_NAME,
167 '-c',
168 'user.email=%s' % AUTHOR_EMAIL,
169 'commit-tree',
170 '-m',
171 msg,
172 '-p'
173 ] + git.hash_multi(REF)
174 for t in targets:
175 commit_cmd.extend(['-p', binascii.hexlify(t).decode()])
176 commit_cmd.append(tree_id)
177 commit_hash = git.run(*commit_cmd)
178 git.run('update-ref', REF, commit_hash)
179 DIRTY_TREES.clear()
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000180
181
182def preload_tree(prefix):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000183 """Returns the prefix and parsed tree object for the specified prefix."""
184 return prefix, get_number_tree(prefix)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000185
186
187def all_prefixes(depth=PREFIX_LEN):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000188 prefixes = [bytes([i]) for i in range(255)]
189 for x in prefixes:
190 # This isn't covered because PREFIX_LEN currently == 1
191 if depth > 1: # pragma: no cover
192 for r in all_prefixes(depth - 1):
193 yield x + r
194 else:
195 yield x
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000196
197
198def load_generation_numbers(targets):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000199 """Populates the caches of get_num and get_number_tree so they contain
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000200 the results for |targets|.
201
202 Loads cached numbers from disk, and calculates missing numbers if one or
203 more of |targets| is newer than the cached calculations.
204
205 Args:
206 targets - An iterable of binary-encoded full git commit hashes.
207 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000208 # In case they pass us a generator, listify targets.
209 targets = list(targets)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000210
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000211 if all(get_num(t) is not None for t in targets):
212 return
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000213
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000214 if git.tree(REF) is None:
215 empty = git.mktree({})
216 commit_hash = git.run(
217 # Git user.name and/or user.email may not be configured, so
218 # specifying them explicitly. They are not used, but required by
219 # Git.
220 '-c',
221 'user.name=%s' % AUTHOR_NAME,
222 '-c',
223 'user.email=%s' % AUTHOR_EMAIL,
224 'commit-tree',
225 '-m',
226 'Initial commit from git-number',
227 empty)
228 git.run('update-ref', REF, commit_hash)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000229
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000230 with git.ScopedPool(kind=POOL_KIND) as pool:
231 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000232
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000233 rev_list = []
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000234
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000235 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
236 # Curiously, buffering the list into memory seems to be the fastest
237 # approach in python (as opposed to iterating over the lines in the
238 # stdout as they're produced). GIL strikes again :/
239 cmd = [
240 'rev-list',
241 '--topo-order',
242 '--parents',
243 '--reverse',
244 '^' + REF,
245 ] + [binascii.hexlify(target).decode() for target in targets]
246 for line in git.run(*cmd).splitlines():
247 tokens = [binascii.unhexlify(token) for token in line.split()]
248 rev_list.append((tokens[0], tokens[1:]))
249 inc()
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000250
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000251 get_number_tree.update(preload_iter)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000252
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000253 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
254 for commit_hash, pars in rev_list:
255 num = max(map(get_num, pars)) + 1 if pars else 0
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000256
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000257 prefix = commit_hash[:PREFIX_LEN]
258 get_number_tree(prefix)[commit_hash] = num
259 DIRTY_TREES[prefix] += 1
260 get_num.set(commit_hash, num)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000261
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000262 inc()
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000263
264
265def main(): # pragma: no cover
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000266 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
267 parser.add_option('--no-cache',
268 action='store_true',
269 help='Do not actually cache anything we calculate.')
270 parser.add_option('--reset',
271 action='store_true',
272 help='Reset the generation number cache and quit.')
273 parser.add_option('-v',
274 '--verbose',
275 action='count',
276 default=0,
277 help='Be verbose. Use more times for more verbosity.')
278 opts, args = parser.parse_args()
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000279
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000280 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
281 logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)])
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000282
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000283 # 'git number' should only be used on bots.
284 if os.getenv('CHROME_HEADLESS') != '1':
285 logging.error(
286 "'git-number' is an infrastructure tool that is only "
287 "intended to be used internally by bots. Developers should "
288 "use the 'Cr-Commit-Position' value in the commit's message.")
289 return 1
dnj@chromium.orge57a6eb2014-09-02 20:49:59 +0000290
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000291 if opts.reset:
292 clear_caches(on_disk=True)
293 return
sbc@chromium.org013731e2015-02-26 18:28:43 +0000294
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000295 try:
296 targets = git.parse_commitrefs(*(args or ['HEAD']))
297 except git.BadCommitRefException as e:
298 parser.error(e)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000299
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000300 load_generation_numbers(targets)
301 if not opts.no_cache:
302 finalize(targets)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000303
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000304 print('\n'.join(map(str, map(get_num, targets))))
305 return 0
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000306
307
308if __name__ == '__main__': # pragma: no cover
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000309 try:
310 sys.exit(main())
311 except KeyboardInterrupt:
312 sys.stderr.write('interrupted\n')
313 sys.exit(1)