blob: 864f3ee8ccebdd9626b68026d6f37c75f9701a9c [file] [log] [blame]
Edward Lemur352808f2019-10-08 17:51:05 +00001#!/usr/bin/env vpython3
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +00002# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Usage: %prog [options] [<commitref>]*
7
8If no <commitref>'s are supplied, it defaults to HEAD.
9
10Calculates the generation number for one or more commits in a git repo.
11
12Generation number of a commit C with parents P is defined as:
13 generation_number(C, []) = 0
14 generation_number(C, P) = max(map(generation_number, P)) + 1
15
16This number can be used to order commits relative to each other, as long as for
17any pair of the commits, one is an ancestor of the other.
18
19Since calculating the generation number of a commit requires walking that
20commit's entire history, this script caches all calculated data inside the git
21repo that it operates on in the ref 'refs/number/commits'.
22"""
23
24import binascii
25import collections
26import logging
27import optparse
28import os
29import struct
30import sys
31import tempfile
32
33import git_common as git
34import subprocess2
35
36CHUNK_FMT = '!20sL'
37CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
38DIRTY_TREES = collections.defaultdict(int)
39REF = 'refs/number/commits'
nodir@chromium.orgee740702014-04-03 01:43:32 +000040AUTHOR_NAME = 'git-number'
41AUTHOR_EMAIL = 'chrome-infrastructure-team@google.com'
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000042
43# Number of bytes to use for the prefix on our internal number structure.
Quinten Yearsley925cedb2020-04-13 17:49:39 +000044# 0 is slow to deserialize. 2 creates way too much bookkeeping overhead (would
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000045# need to reimplement cache data structures to be a bit more sophisticated than
46# dicts. 1 seems to be just right.
47PREFIX_LEN = 1
48
49# Set this to 'threads' to gather coverage data while testing.
50POOL_KIND = 'procs'
51
52
53def pathlify(hash_prefix):
54 """Converts a binary object hash prefix into a posix path, one folder per
55 byte.
56
57 >>> pathlify('\xDE\xAD')
58 'de/ad'
59 """
Gavin Makcc976552023-08-28 17:01:52 +000060 return '/'.join('%02x' % b for b in hash_prefix)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000061
62
63@git.memoize_one(threadsafe=False)
64def get_number_tree(prefix_bytes):
65 """Returns a dictionary of the git-number registry specified by
66 |prefix_bytes|.
67
68 This is in the form of {<full binary ref>: <gen num> ...}
69
70 >>> get_number_tree('\x83\xb4')
71 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
72 """
73 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
74
75 try:
Edward Lemur352808f2019-10-08 17:51:05 +000076 raw = git.run('cat-file', 'blob', ref, autostrip=False, decode=False)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000077 return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
Edward Lemur352808f2019-10-08 17:51:05 +000078 for i in range(len(raw) // CHUNK_SIZE))
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000079 except subprocess2.CalledProcessError:
80 return {}
81
82
83@git.memoize_one(threadsafe=False)
84def get_num(commit_hash):
85 """Returns the generation number for a commit.
86
87 Returns None if the generation number for this commit hasn't been calculated
88 yet (see load_generation_numbers()).
89 """
90 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
91
92
93def clear_caches(on_disk=False):
94 """Clears in-process caches for e.g. unit testing."""
95 get_number_tree.clear()
96 get_num.clear()
97 if on_disk:
98 git.run('update-ref', '-d', REF)
99
100
101def intern_number_tree(tree):
102 """Transforms a number tree (in the form returned by |get_number_tree|) into
103 a git blob.
104
105 Returns the git blob id as hex-encoded string.
106
107 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
108 >>> intern_number_tree(d)
109 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
110 """
111 with tempfile.TemporaryFile() as f:
Edward Lemur352808f2019-10-08 17:51:05 +0000112 for k, v in sorted(tree.items()):
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000113 f.write(struct.pack(CHUNK_FMT, k, v))
114 f.seek(0)
115 return git.intern_f(f)
116
117
Edward Lemur352808f2019-10-08 17:51:05 +0000118def leaf_map_fn(pre_tree):
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000119 """Converts a prefix and number tree into a git index line."""
Edward Lemur352808f2019-10-08 17:51:05 +0000120 pre, tree = pre_tree
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000121 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
122
123
124def finalize(targets):
125 """Saves all cache data to the git repository.
126
127 After calculating the generation number for |targets|, call finalize() to
128 save all the work to the git repository.
129
130 This in particular saves the trees referred to by DIRTY_TREES.
131 """
132 if not DIRTY_TREES:
133 return
134
Edward Lemur352808f2019-10-08 17:51:05 +0000135 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.values())
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000136
137 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
138 env = os.environ.copy()
Josip Sokcevicd1d4ac92020-03-26 18:19:15 +0000139 env['GIT_INDEX_FILE'] = str(idx)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000140
141 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
142 with git.ProgressPrinter(progress_message) as inc:
143 git.run('read-tree', REF, env=env)
144
145 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
146 updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'],
147 stdin=subprocess2.PIPE, env=env)
148
149 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
150 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
Edward Lemur352808f2019-10-08 17:51:05 +0000151 updater.stdin.write(item.encode())
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000152 inc()
153
154 updater.stdin.close()
155 updater.wait()
156 assert updater.returncode == 0
157
158 tree_id = git.run('write-tree', env=env)
nodir@chromium.orgee740702014-04-03 01:43:32 +0000159 commit_cmd = [
160 # Git user.name and/or user.email may not be configured, so specifying
Quinten Yearsley925cedb2020-04-13 17:49:39 +0000161 # them explicitly. They are not used, but required by Git.
nodir@chromium.orgee740702014-04-03 01:43:32 +0000162 '-c', 'user.name=%s' % AUTHOR_NAME,
163 '-c', 'user.email=%s' % AUTHOR_EMAIL,
164 'commit-tree',
165 '-m', msg,
166 '-p'] + git.hash_multi(REF)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000167 for t in targets:
Edward Lemur352808f2019-10-08 17:51:05 +0000168 commit_cmd.extend(['-p', binascii.hexlify(t).decode()])
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000169 commit_cmd.append(tree_id)
170 commit_hash = git.run(*commit_cmd)
171 git.run('update-ref', REF, commit_hash)
172 DIRTY_TREES.clear()
173
174
175def preload_tree(prefix):
176 """Returns the prefix and parsed tree object for the specified prefix."""
177 return prefix, get_number_tree(prefix)
178
179
180def all_prefixes(depth=PREFIX_LEN):
Gavin Makcc976552023-08-28 17:01:52 +0000181 prefixes = [bytes([i]) for i in range(255)]
Edward Lemur352808f2019-10-08 17:51:05 +0000182 for x in prefixes:
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000183 # This isn't covered because PREFIX_LEN currently == 1
184 if depth > 1: # pragma: no cover
185 for r in all_prefixes(depth - 1):
186 yield x + r
187 else:
188 yield x
189
190
191def load_generation_numbers(targets):
192 """Populates the caches of get_num and get_number_tree so they contain
193 the results for |targets|.
194
195 Loads cached numbers from disk, and calculates missing numbers if one or
196 more of |targets| is newer than the cached calculations.
197
198 Args:
199 targets - An iterable of binary-encoded full git commit hashes.
200 """
201 # In case they pass us a generator, listify targets.
202 targets = list(targets)
203
204 if all(get_num(t) is not None for t in targets):
205 return
206
207 if git.tree(REF) is None:
208 empty = git.mktree({})
John Budorickbcec9e72017-06-01 07:42:07 -0700209 commit_hash = git.run(
210 # Git user.name and/or user.email may not be configured, so specifying
Quinten Yearsley925cedb2020-04-13 17:49:39 +0000211 # them explicitly. They are not used, but required by Git.
John Budorickbcec9e72017-06-01 07:42:07 -0700212 '-c', 'user.name=%s' % AUTHOR_NAME,
213 '-c', 'user.email=%s' % AUTHOR_EMAIL,
214 'commit-tree',
215 '-m', 'Initial commit from git-number',
216 empty)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000217 git.run('update-ref', REF, commit_hash)
218
219 with git.ScopedPool(kind=POOL_KIND) as pool:
220 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
221
222 rev_list = []
223
224 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
225 # Curiously, buffering the list into memory seems to be the fastest
226 # approach in python (as opposed to iterating over the lines in the
227 # stdout as they're produced). GIL strikes again :/
228 cmd = [
229 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF,
Edward Lemur352808f2019-10-08 17:51:05 +0000230 ] + [binascii.hexlify(target).decode() for target in targets]
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000231 for line in git.run(*cmd).splitlines():
Edward Lemur352808f2019-10-08 17:51:05 +0000232 tokens = [binascii.unhexlify(token) for token in line.split()]
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000233 rev_list.append((tokens[0], tokens[1:]))
234 inc()
235
236 get_number_tree.update(preload_iter)
237
238 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
239 for commit_hash, pars in rev_list:
240 num = max(map(get_num, pars)) + 1 if pars else 0
241
242 prefix = commit_hash[:PREFIX_LEN]
243 get_number_tree(prefix)[commit_hash] = num
244 DIRTY_TREES[prefix] += 1
245 get_num.set(commit_hash, num)
246
247 inc()
248
249
250def main(): # pragma: no cover
251 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
252 parser.add_option('--no-cache', action='store_true',
253 help='Do not actually cache anything we calculate.')
254 parser.add_option('--reset', action='store_true',
255 help='Reset the generation number cache and quit.')
256 parser.add_option('-v', '--verbose', action='count', default=0,
257 help='Be verbose. Use more times for more verbosity.')
258 opts, args = parser.parse_args()
259
260 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
261 logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)])
262
dnj@chromium.orge57a6eb2014-09-02 20:49:59 +0000263 # 'git number' should only be used on bots.
264 if os.getenv('CHROME_HEADLESS') != '1':
265 logging.error("'git-number' is an infrastructure tool that is only "
266 "intended to be used internally by bots. Developers should "
267 "use the 'Cr-Commit-Position' value in the commit's message.")
268 return 1
269
sbc@chromium.org013731e2015-02-26 18:28:43 +0000270 if opts.reset:
271 clear_caches(on_disk=True)
272 return
273
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000274 try:
sbc@chromium.org013731e2015-02-26 18:28:43 +0000275 targets = git.parse_commitrefs(*(args or ['HEAD']))
276 except git.BadCommitRefException as e:
277 parser.error(e)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000278
sbc@chromium.org013731e2015-02-26 18:28:43 +0000279 load_generation_numbers(targets)
280 if not opts.no_cache:
281 finalize(targets)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000282
Raul Tambre80ee78e2019-05-06 22:41:05 +0000283 print('\n'.join(map(str, map(get_num, targets))))
sbc@chromium.org013731e2015-02-26 18:28:43 +0000284 return 0
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000285
286
287if __name__ == '__main__': # pragma: no cover
sbc@chromium.org013731e2015-02-26 18:28:43 +0000288 try:
289 sys.exit(main())
290 except KeyboardInterrupt:
291 sys.stderr.write('interrupted\n')
292 sys.exit(1)