blob: d8bca8c61c5c0e684d56145085779008a788ef7a [file] [log] [blame]
Edward Lemur352808f2019-10-08 17:51:05 +00001#!/usr/bin/env vpython3
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +00002# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Usage: %prog [options] [<commitref>]*
7
8If no <commitref>'s are supplied, it defaults to HEAD.
9
10Calculates the generation number for one or more commits in a git repo.
11
12Generation number of a commit C with parents P is defined as:
13 generation_number(C, []) = 0
14 generation_number(C, P) = max(map(generation_number, P)) + 1
15
16This number can be used to order commits relative to each other, as long as for
17any pair of the commits, one is an ancestor of the other.
18
19Since calculating the generation number of a commit requires walking that
20commit's entire history, this script caches all calculated data inside the git
21repo that it operates on in the ref 'refs/number/commits'.
22"""
23
Raul Tambre80ee78e2019-05-06 22:41:05 +000024from __future__ import print_function
Edward Lemur352808f2019-10-08 17:51:05 +000025from __future__ import division
Raul Tambre80ee78e2019-05-06 22:41:05 +000026
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000027import binascii
28import collections
29import logging
30import optparse
31import os
32import struct
33import sys
34import tempfile
35
36import git_common as git
37import subprocess2
38
39CHUNK_FMT = '!20sL'
40CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
41DIRTY_TREES = collections.defaultdict(int)
42REF = 'refs/number/commits'
nodir@chromium.orgee740702014-04-03 01:43:32 +000043AUTHOR_NAME = 'git-number'
44AUTHOR_EMAIL = 'chrome-infrastructure-team@google.com'
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000045
46# Number of bytes to use for the prefix on our internal number structure.
47# 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
48# need to reimplement cache data structures to be a bit more sophisticated than
49# dicts. 1 seems to be just right.
50PREFIX_LEN = 1
51
52# Set this to 'threads' to gather coverage data while testing.
53POOL_KIND = 'procs'
54
55
56def pathlify(hash_prefix):
57 """Converts a binary object hash prefix into a posix path, one folder per
58 byte.
59
60 >>> pathlify('\xDE\xAD')
61 'de/ad'
62 """
Edward Lemur352808f2019-10-08 17:51:05 +000063 if sys.version_info.major == 3:
64 return '/'.join('%02x' % b for b in hash_prefix)
65 else:
66 return '/'.join('%02x' % ord(b) for b in hash_prefix)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000067
68
69@git.memoize_one(threadsafe=False)
70def get_number_tree(prefix_bytes):
71 """Returns a dictionary of the git-number registry specified by
72 |prefix_bytes|.
73
74 This is in the form of {<full binary ref>: <gen num> ...}
75
76 >>> get_number_tree('\x83\xb4')
77 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
78 """
79 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
80
81 try:
Edward Lemur352808f2019-10-08 17:51:05 +000082 raw = git.run('cat-file', 'blob', ref, autostrip=False, decode=False)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000083 return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
Edward Lemur352808f2019-10-08 17:51:05 +000084 for i in range(len(raw) // CHUNK_SIZE))
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000085 except subprocess2.CalledProcessError:
86 return {}
87
88
89@git.memoize_one(threadsafe=False)
90def get_num(commit_hash):
91 """Returns the generation number for a commit.
92
93 Returns None if the generation number for this commit hasn't been calculated
94 yet (see load_generation_numbers()).
95 """
96 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
97
98
99def clear_caches(on_disk=False):
100 """Clears in-process caches for e.g. unit testing."""
101 get_number_tree.clear()
102 get_num.clear()
103 if on_disk:
104 git.run('update-ref', '-d', REF)
105
106
107def intern_number_tree(tree):
108 """Transforms a number tree (in the form returned by |get_number_tree|) into
109 a git blob.
110
111 Returns the git blob id as hex-encoded string.
112
113 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
114 >>> intern_number_tree(d)
115 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
116 """
117 with tempfile.TemporaryFile() as f:
Edward Lemur352808f2019-10-08 17:51:05 +0000118 for k, v in sorted(tree.items()):
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000119 f.write(struct.pack(CHUNK_FMT, k, v))
120 f.seek(0)
121 return git.intern_f(f)
122
123
Edward Lemur352808f2019-10-08 17:51:05 +0000124def leaf_map_fn(pre_tree):
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000125 """Converts a prefix and number tree into a git index line."""
Edward Lemur352808f2019-10-08 17:51:05 +0000126 pre, tree = pre_tree
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000127 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
128
129
130def finalize(targets):
131 """Saves all cache data to the git repository.
132
133 After calculating the generation number for |targets|, call finalize() to
134 save all the work to the git repository.
135
136 This in particular saves the trees referred to by DIRTY_TREES.
137 """
138 if not DIRTY_TREES:
139 return
140
Edward Lemur352808f2019-10-08 17:51:05 +0000141 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.values())
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000142
143 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
144 env = os.environ.copy()
Josip Sokcevicd1d4ac92020-03-26 18:19:15 +0000145 env['GIT_INDEX_FILE'] = str(idx)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000146
147 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
148 with git.ProgressPrinter(progress_message) as inc:
149 git.run('read-tree', REF, env=env)
150
151 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
152 updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'],
153 stdin=subprocess2.PIPE, env=env)
154
155 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
156 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
Edward Lemur352808f2019-10-08 17:51:05 +0000157 updater.stdin.write(item.encode())
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000158 inc()
159
160 updater.stdin.close()
161 updater.wait()
162 assert updater.returncode == 0
163
164 tree_id = git.run('write-tree', env=env)
nodir@chromium.orgee740702014-04-03 01:43:32 +0000165 commit_cmd = [
166 # Git user.name and/or user.email may not be configured, so specifying
167 # them explicitly. They are not used, but requried by Git.
168 '-c', 'user.name=%s' % AUTHOR_NAME,
169 '-c', 'user.email=%s' % AUTHOR_EMAIL,
170 'commit-tree',
171 '-m', msg,
172 '-p'] + git.hash_multi(REF)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000173 for t in targets:
Edward Lemur352808f2019-10-08 17:51:05 +0000174 commit_cmd.extend(['-p', binascii.hexlify(t).decode()])
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000175 commit_cmd.append(tree_id)
176 commit_hash = git.run(*commit_cmd)
177 git.run('update-ref', REF, commit_hash)
178 DIRTY_TREES.clear()
179
180
181def preload_tree(prefix):
182 """Returns the prefix and parsed tree object for the specified prefix."""
183 return prefix, get_number_tree(prefix)
184
185
186def all_prefixes(depth=PREFIX_LEN):
Edward Lemur352808f2019-10-08 17:51:05 +0000187 if sys.version_info.major == 3:
188 prefixes = [bytes([i]) for i in range(255)]
189 else:
190 prefixes = [chr(i) for i in range(255)]
191 for x in prefixes:
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000192 # This isn't covered because PREFIX_LEN currently == 1
193 if depth > 1: # pragma: no cover
194 for r in all_prefixes(depth - 1):
195 yield x + r
196 else:
197 yield x
198
199
200def load_generation_numbers(targets):
201 """Populates the caches of get_num and get_number_tree so they contain
202 the results for |targets|.
203
204 Loads cached numbers from disk, and calculates missing numbers if one or
205 more of |targets| is newer than the cached calculations.
206
207 Args:
208 targets - An iterable of binary-encoded full git commit hashes.
209 """
210 # In case they pass us a generator, listify targets.
211 targets = list(targets)
212
213 if all(get_num(t) is not None for t in targets):
214 return
215
216 if git.tree(REF) is None:
217 empty = git.mktree({})
John Budorickbcec9e72017-06-01 07:42:07 -0700218 commit_hash = git.run(
219 # Git user.name and/or user.email may not be configured, so specifying
220 # them explicitly. They are not used, but requried by Git.
221 '-c', 'user.name=%s' % AUTHOR_NAME,
222 '-c', 'user.email=%s' % AUTHOR_EMAIL,
223 'commit-tree',
224 '-m', 'Initial commit from git-number',
225 empty)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000226 git.run('update-ref', REF, commit_hash)
227
228 with git.ScopedPool(kind=POOL_KIND) as pool:
229 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
230
231 rev_list = []
232
233 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
234 # Curiously, buffering the list into memory seems to be the fastest
235 # approach in python (as opposed to iterating over the lines in the
236 # stdout as they're produced). GIL strikes again :/
237 cmd = [
238 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF,
Edward Lemur352808f2019-10-08 17:51:05 +0000239 ] + [binascii.hexlify(target).decode() for target in targets]
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000240 for line in git.run(*cmd).splitlines():
Edward Lemur352808f2019-10-08 17:51:05 +0000241 tokens = [binascii.unhexlify(token) for token in line.split()]
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000242 rev_list.append((tokens[0], tokens[1:]))
243 inc()
244
245 get_number_tree.update(preload_iter)
246
247 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
248 for commit_hash, pars in rev_list:
249 num = max(map(get_num, pars)) + 1 if pars else 0
250
251 prefix = commit_hash[:PREFIX_LEN]
252 get_number_tree(prefix)[commit_hash] = num
253 DIRTY_TREES[prefix] += 1
254 get_num.set(commit_hash, num)
255
256 inc()
257
258
259def main(): # pragma: no cover
260 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
261 parser.add_option('--no-cache', action='store_true',
262 help='Do not actually cache anything we calculate.')
263 parser.add_option('--reset', action='store_true',
264 help='Reset the generation number cache and quit.')
265 parser.add_option('-v', '--verbose', action='count', default=0,
266 help='Be verbose. Use more times for more verbosity.')
267 opts, args = parser.parse_args()
268
269 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
270 logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)])
271
dnj@chromium.orge57a6eb2014-09-02 20:49:59 +0000272 # 'git number' should only be used on bots.
273 if os.getenv('CHROME_HEADLESS') != '1':
274 logging.error("'git-number' is an infrastructure tool that is only "
275 "intended to be used internally by bots. Developers should "
276 "use the 'Cr-Commit-Position' value in the commit's message.")
277 return 1
278
sbc@chromium.org013731e2015-02-26 18:28:43 +0000279 if opts.reset:
280 clear_caches(on_disk=True)
281 return
282
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000283 try:
sbc@chromium.org013731e2015-02-26 18:28:43 +0000284 targets = git.parse_commitrefs(*(args or ['HEAD']))
285 except git.BadCommitRefException as e:
286 parser.error(e)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000287
sbc@chromium.org013731e2015-02-26 18:28:43 +0000288 load_generation_numbers(targets)
289 if not opts.no_cache:
290 finalize(targets)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000291
Raul Tambre80ee78e2019-05-06 22:41:05 +0000292 print('\n'.join(map(str, map(get_num, targets))))
sbc@chromium.org013731e2015-02-26 18:28:43 +0000293 return 0
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000294
295
296if __name__ == '__main__': # pragma: no cover
sbc@chromium.org013731e2015-02-26 18:28:43 +0000297 try:
298 sys.exit(main())
299 except KeyboardInterrupt:
300 sys.stderr.write('interrupted\n')
301 sys.exit(1)