blob: 31d3653628b1539ddd2d0595f8fce8f3bc32346d [file] [log] [blame]
Edward Lemurd6186f92019-08-12 17:56:58 +00001#!/usr/bin/env vpython
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +00002# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Usage: %prog [options] [<commitref>]*
7
8If no <commitref>'s are supplied, it defaults to HEAD.
9
10Calculates the generation number for one or more commits in a git repo.
11
12Generation number of a commit C with parents P is defined as:
13 generation_number(C, []) = 0
14 generation_number(C, P) = max(map(generation_number, P)) + 1
15
16This number can be used to order commits relative to each other, as long as for
17any pair of the commits, one is an ancestor of the other.
18
19Since calculating the generation number of a commit requires walking that
20commit's entire history, this script caches all calculated data inside the git
21repo that it operates on in the ref 'refs/number/commits'.
22"""
23
Raul Tambre80ee78e2019-05-06 22:41:05 +000024from __future__ import print_function
25
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000026import binascii
27import collections
28import logging
29import optparse
30import os
31import struct
32import sys
33import tempfile
34
35import git_common as git
36import subprocess2
37
38CHUNK_FMT = '!20sL'
39CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
40DIRTY_TREES = collections.defaultdict(int)
41REF = 'refs/number/commits'
nodir@chromium.orgee740702014-04-03 01:43:32 +000042AUTHOR_NAME = 'git-number'
43AUTHOR_EMAIL = 'chrome-infrastructure-team@google.com'
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000044
45# Number of bytes to use for the prefix on our internal number structure.
46# 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
47# need to reimplement cache data structures to be a bit more sophisticated than
48# dicts. 1 seems to be just right.
49PREFIX_LEN = 1
50
51# Set this to 'threads' to gather coverage data while testing.
52POOL_KIND = 'procs'
53
54
55def pathlify(hash_prefix):
56 """Converts a binary object hash prefix into a posix path, one folder per
57 byte.
58
59 >>> pathlify('\xDE\xAD')
60 'de/ad'
61 """
62 return '/'.join('%02x' % ord(b) for b in hash_prefix)
63
64
65@git.memoize_one(threadsafe=False)
66def get_number_tree(prefix_bytes):
67 """Returns a dictionary of the git-number registry specified by
68 |prefix_bytes|.
69
70 This is in the form of {<full binary ref>: <gen num> ...}
71
72 >>> get_number_tree('\x83\xb4')
73 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
74 """
75 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
76
77 try:
78 raw = buffer(git.run('cat-file', 'blob', ref, autostrip=False))
79 return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
80 for i in xrange(len(raw) / CHUNK_SIZE))
81 except subprocess2.CalledProcessError:
82 return {}
83
84
85@git.memoize_one(threadsafe=False)
86def get_num(commit_hash):
87 """Returns the generation number for a commit.
88
89 Returns None if the generation number for this commit hasn't been calculated
90 yet (see load_generation_numbers()).
91 """
92 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
93
94
95def clear_caches(on_disk=False):
96 """Clears in-process caches for e.g. unit testing."""
97 get_number_tree.clear()
98 get_num.clear()
99 if on_disk:
100 git.run('update-ref', '-d', REF)
101
102
103def intern_number_tree(tree):
104 """Transforms a number tree (in the form returned by |get_number_tree|) into
105 a git blob.
106
107 Returns the git blob id as hex-encoded string.
108
109 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
110 >>> intern_number_tree(d)
111 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
112 """
113 with tempfile.TemporaryFile() as f:
114 for k, v in sorted(tree.iteritems()):
115 f.write(struct.pack(CHUNK_FMT, k, v))
116 f.seek(0)
117 return git.intern_f(f)
118
119
120def leaf_map_fn((pre, tree)):
121 """Converts a prefix and number tree into a git index line."""
122 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
123
124
125def finalize(targets):
126 """Saves all cache data to the git repository.
127
128 After calculating the generation number for |targets|, call finalize() to
129 save all the work to the git repository.
130
131 This in particular saves the trees referred to by DIRTY_TREES.
132 """
133 if not DIRTY_TREES:
134 return
135
136 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
137
138 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
139 env = os.environ.copy()
140 env['GIT_INDEX_FILE'] = idx
141
142 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
143 with git.ProgressPrinter(progress_message) as inc:
144 git.run('read-tree', REF, env=env)
145
146 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
147 updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'],
148 stdin=subprocess2.PIPE, env=env)
149
150 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
151 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
152 updater.stdin.write(item)
153 inc()
154
155 updater.stdin.close()
156 updater.wait()
157 assert updater.returncode == 0
158
159 tree_id = git.run('write-tree', env=env)
nodir@chromium.orgee740702014-04-03 01:43:32 +0000160 commit_cmd = [
161 # Git user.name and/or user.email may not be configured, so specifying
162 # them explicitly. They are not used, but requried by Git.
163 '-c', 'user.name=%s' % AUTHOR_NAME,
164 '-c', 'user.email=%s' % AUTHOR_EMAIL,
165 'commit-tree',
166 '-m', msg,
167 '-p'] + git.hash_multi(REF)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000168 for t in targets:
169 commit_cmd.extend(['-p', binascii.hexlify(t)])
170 commit_cmd.append(tree_id)
171 commit_hash = git.run(*commit_cmd)
172 git.run('update-ref', REF, commit_hash)
173 DIRTY_TREES.clear()
174
175
176def preload_tree(prefix):
177 """Returns the prefix and parsed tree object for the specified prefix."""
178 return prefix, get_number_tree(prefix)
179
180
181def all_prefixes(depth=PREFIX_LEN):
182 for x in (chr(i) for i in xrange(255)):
183 # This isn't covered because PREFIX_LEN currently == 1
184 if depth > 1: # pragma: no cover
185 for r in all_prefixes(depth - 1):
186 yield x + r
187 else:
188 yield x
189
190
191def load_generation_numbers(targets):
192 """Populates the caches of get_num and get_number_tree so they contain
193 the results for |targets|.
194
195 Loads cached numbers from disk, and calculates missing numbers if one or
196 more of |targets| is newer than the cached calculations.
197
198 Args:
199 targets - An iterable of binary-encoded full git commit hashes.
200 """
201 # In case they pass us a generator, listify targets.
202 targets = list(targets)
203
204 if all(get_num(t) is not None for t in targets):
205 return
206
207 if git.tree(REF) is None:
208 empty = git.mktree({})
John Budorickbcec9e72017-06-01 07:42:07 -0700209 commit_hash = git.run(
210 # Git user.name and/or user.email may not be configured, so specifying
211 # them explicitly. They are not used, but requried by Git.
212 '-c', 'user.name=%s' % AUTHOR_NAME,
213 '-c', 'user.email=%s' % AUTHOR_EMAIL,
214 'commit-tree',
215 '-m', 'Initial commit from git-number',
216 empty)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000217 git.run('update-ref', REF, commit_hash)
218
219 with git.ScopedPool(kind=POOL_KIND) as pool:
220 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
221
222 rev_list = []
223
224 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
225 # Curiously, buffering the list into memory seems to be the fastest
226 # approach in python (as opposed to iterating over the lines in the
227 # stdout as they're produced). GIL strikes again :/
228 cmd = [
229 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF,
230 ] + map(binascii.hexlify, targets)
231 for line in git.run(*cmd).splitlines():
232 tokens = map(binascii.unhexlify, line.split())
233 rev_list.append((tokens[0], tokens[1:]))
234 inc()
235
236 get_number_tree.update(preload_iter)
237
238 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
239 for commit_hash, pars in rev_list:
240 num = max(map(get_num, pars)) + 1 if pars else 0
241
242 prefix = commit_hash[:PREFIX_LEN]
243 get_number_tree(prefix)[commit_hash] = num
244 DIRTY_TREES[prefix] += 1
245 get_num.set(commit_hash, num)
246
247 inc()
248
249
250def main(): # pragma: no cover
251 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
252 parser.add_option('--no-cache', action='store_true',
253 help='Do not actually cache anything we calculate.')
254 parser.add_option('--reset', action='store_true',
255 help='Reset the generation number cache and quit.')
256 parser.add_option('-v', '--verbose', action='count', default=0,
257 help='Be verbose. Use more times for more verbosity.')
258 opts, args = parser.parse_args()
259
260 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
261 logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)])
262
dnj@chromium.orge57a6eb2014-09-02 20:49:59 +0000263 # 'git number' should only be used on bots.
264 if os.getenv('CHROME_HEADLESS') != '1':
265 logging.error("'git-number' is an infrastructure tool that is only "
266 "intended to be used internally by bots. Developers should "
267 "use the 'Cr-Commit-Position' value in the commit's message.")
268 return 1
269
sbc@chromium.org013731e2015-02-26 18:28:43 +0000270 if opts.reset:
271 clear_caches(on_disk=True)
272 return
273
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000274 try:
sbc@chromium.org013731e2015-02-26 18:28:43 +0000275 targets = git.parse_commitrefs(*(args or ['HEAD']))
276 except git.BadCommitRefException as e:
277 parser.error(e)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000278
sbc@chromium.org013731e2015-02-26 18:28:43 +0000279 load_generation_numbers(targets)
280 if not opts.no_cache:
281 finalize(targets)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000282
Raul Tambre80ee78e2019-05-06 22:41:05 +0000283 print('\n'.join(map(str, map(get_num, targets))))
sbc@chromium.org013731e2015-02-26 18:28:43 +0000284 return 0
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000285
286
287if __name__ == '__main__': # pragma: no cover
sbc@chromium.org013731e2015-02-26 18:28:43 +0000288 try:
289 sys.exit(main())
290 except KeyboardInterrupt:
291 sys.stderr.write('interrupted\n')
292 sys.exit(1)