blob: 0e3ac16dd1067659acf96429db75a6c058adf17e [file] [log] [blame]
Edward Lesmes98eda3f2019-08-12 21:09:53 +00001#!/usr/bin/env python
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +00002# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Usage: %prog [options] [<commitref>]*
7
8If no <commitref>'s are supplied, it defaults to HEAD.
9
10Calculates the generation number for one or more commits in a git repo.
11
12Generation number of a commit C with parents P is defined as:
13 generation_number(C, []) = 0
14 generation_number(C, P) = max(map(generation_number, P)) + 1
15
16This number can be used to order commits relative to each other, as long as for
17any pair of the commits, one is an ancestor of the other.
18
19Since calculating the generation number of a commit requires walking that
20commit's entire history, this script caches all calculated data inside the git
21repo that it operates on in the ref 'refs/number/commits'.
22"""
23
Raul Tambre80ee78e2019-05-06 22:41:05 +000024from __future__ import print_function
25
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000026import binascii
27import collections
28import logging
29import optparse
30import os
31import struct
32import sys
33import tempfile
34
35import git_common as git
36import subprocess2
37
38CHUNK_FMT = '!20sL'
39CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
40DIRTY_TREES = collections.defaultdict(int)
41REF = 'refs/number/commits'
nodir@chromium.orgee740702014-04-03 01:43:32 +000042AUTHOR_NAME = 'git-number'
43AUTHOR_EMAIL = 'chrome-infrastructure-team@google.com'
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000044
45# Number of bytes to use for the prefix on our internal number structure.
46# 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
47# need to reimplement cache data structures to be a bit more sophisticated than
48# dicts. 1 seems to be just right.
49PREFIX_LEN = 1
50
51# Set this to 'threads' to gather coverage data while testing.
52POOL_KIND = 'procs'
53
54
55def pathlify(hash_prefix):
56 """Converts a binary object hash prefix into a posix path, one folder per
57 byte.
58
59 >>> pathlify('\xDE\xAD')
60 'de/ad'
61 """
62 return '/'.join('%02x' % ord(b) for b in hash_prefix)
63
64
65@git.memoize_one(threadsafe=False)
66def get_number_tree(prefix_bytes):
67 """Returns a dictionary of the git-number registry specified by
68 |prefix_bytes|.
69
70 This is in the form of {<full binary ref>: <gen num> ...}
71
72 >>> get_number_tree('\x83\xb4')
73 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
74 """
75 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
76
77 try:
Edward Lemur12a537f2019-10-03 21:57:15 +000078 raw = buffer(
79 git.run('cat-file', 'blob', ref, autostrip=False, decode=False))
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000080 return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
81 for i in xrange(len(raw) / CHUNK_SIZE))
82 except subprocess2.CalledProcessError:
83 return {}
84
85
86@git.memoize_one(threadsafe=False)
87def get_num(commit_hash):
88 """Returns the generation number for a commit.
89
90 Returns None if the generation number for this commit hasn't been calculated
91 yet (see load_generation_numbers()).
92 """
93 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
94
95
96def clear_caches(on_disk=False):
97 """Clears in-process caches for e.g. unit testing."""
98 get_number_tree.clear()
99 get_num.clear()
100 if on_disk:
101 git.run('update-ref', '-d', REF)
102
103
104def intern_number_tree(tree):
105 """Transforms a number tree (in the form returned by |get_number_tree|) into
106 a git blob.
107
108 Returns the git blob id as hex-encoded string.
109
110 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
111 >>> intern_number_tree(d)
112 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
113 """
114 with tempfile.TemporaryFile() as f:
115 for k, v in sorted(tree.iteritems()):
116 f.write(struct.pack(CHUNK_FMT, k, v))
117 f.seek(0)
118 return git.intern_f(f)
119
120
121def leaf_map_fn((pre, tree)):
122 """Converts a prefix and number tree into a git index line."""
123 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
124
125
126def finalize(targets):
127 """Saves all cache data to the git repository.
128
129 After calculating the generation number for |targets|, call finalize() to
130 save all the work to the git repository.
131
132 This in particular saves the trees referred to by DIRTY_TREES.
133 """
134 if not DIRTY_TREES:
135 return
136
137 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
138
139 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
140 env = os.environ.copy()
141 env['GIT_INDEX_FILE'] = idx
142
143 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
144 with git.ProgressPrinter(progress_message) as inc:
145 git.run('read-tree', REF, env=env)
146
147 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
148 updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'],
149 stdin=subprocess2.PIPE, env=env)
150
151 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
152 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
153 updater.stdin.write(item)
154 inc()
155
156 updater.stdin.close()
157 updater.wait()
158 assert updater.returncode == 0
159
160 tree_id = git.run('write-tree', env=env)
nodir@chromium.orgee740702014-04-03 01:43:32 +0000161 commit_cmd = [
162 # Git user.name and/or user.email may not be configured, so specifying
163 # them explicitly. They are not used, but requried by Git.
164 '-c', 'user.name=%s' % AUTHOR_NAME,
165 '-c', 'user.email=%s' % AUTHOR_EMAIL,
166 'commit-tree',
167 '-m', msg,
168 '-p'] + git.hash_multi(REF)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000169 for t in targets:
170 commit_cmd.extend(['-p', binascii.hexlify(t)])
171 commit_cmd.append(tree_id)
172 commit_hash = git.run(*commit_cmd)
173 git.run('update-ref', REF, commit_hash)
174 DIRTY_TREES.clear()
175
176
177def preload_tree(prefix):
178 """Returns the prefix and parsed tree object for the specified prefix."""
179 return prefix, get_number_tree(prefix)
180
181
182def all_prefixes(depth=PREFIX_LEN):
183 for x in (chr(i) for i in xrange(255)):
184 # This isn't covered because PREFIX_LEN currently == 1
185 if depth > 1: # pragma: no cover
186 for r in all_prefixes(depth - 1):
187 yield x + r
188 else:
189 yield x
190
191
192def load_generation_numbers(targets):
193 """Populates the caches of get_num and get_number_tree so they contain
194 the results for |targets|.
195
196 Loads cached numbers from disk, and calculates missing numbers if one or
197 more of |targets| is newer than the cached calculations.
198
199 Args:
200 targets - An iterable of binary-encoded full git commit hashes.
201 """
202 # In case they pass us a generator, listify targets.
203 targets = list(targets)
204
205 if all(get_num(t) is not None for t in targets):
206 return
207
208 if git.tree(REF) is None:
209 empty = git.mktree({})
John Budorickbcec9e72017-06-01 07:42:07 -0700210 commit_hash = git.run(
211 # Git user.name and/or user.email may not be configured, so specifying
212 # them explicitly. They are not used, but requried by Git.
213 '-c', 'user.name=%s' % AUTHOR_NAME,
214 '-c', 'user.email=%s' % AUTHOR_EMAIL,
215 'commit-tree',
216 '-m', 'Initial commit from git-number',
217 empty)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000218 git.run('update-ref', REF, commit_hash)
219
220 with git.ScopedPool(kind=POOL_KIND) as pool:
221 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
222
223 rev_list = []
224
225 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
226 # Curiously, buffering the list into memory seems to be the fastest
227 # approach in python (as opposed to iterating over the lines in the
228 # stdout as they're produced). GIL strikes again :/
229 cmd = [
230 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF,
231 ] + map(binascii.hexlify, targets)
232 for line in git.run(*cmd).splitlines():
233 tokens = map(binascii.unhexlify, line.split())
234 rev_list.append((tokens[0], tokens[1:]))
235 inc()
236
237 get_number_tree.update(preload_iter)
238
239 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
240 for commit_hash, pars in rev_list:
241 num = max(map(get_num, pars)) + 1 if pars else 0
242
243 prefix = commit_hash[:PREFIX_LEN]
244 get_number_tree(prefix)[commit_hash] = num
245 DIRTY_TREES[prefix] += 1
246 get_num.set(commit_hash, num)
247
248 inc()
249
250
251def main(): # pragma: no cover
252 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
253 parser.add_option('--no-cache', action='store_true',
254 help='Do not actually cache anything we calculate.')
255 parser.add_option('--reset', action='store_true',
256 help='Reset the generation number cache and quit.')
257 parser.add_option('-v', '--verbose', action='count', default=0,
258 help='Be verbose. Use more times for more verbosity.')
259 opts, args = parser.parse_args()
260
261 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
262 logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)])
263
dnj@chromium.orge57a6eb2014-09-02 20:49:59 +0000264 # 'git number' should only be used on bots.
265 if os.getenv('CHROME_HEADLESS') != '1':
266 logging.error("'git-number' is an infrastructure tool that is only "
267 "intended to be used internally by bots. Developers should "
268 "use the 'Cr-Commit-Position' value in the commit's message.")
269 return 1
270
sbc@chromium.org013731e2015-02-26 18:28:43 +0000271 if opts.reset:
272 clear_caches(on_disk=True)
273 return
274
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000275 try:
sbc@chromium.org013731e2015-02-26 18:28:43 +0000276 targets = git.parse_commitrefs(*(args or ['HEAD']))
277 except git.BadCommitRefException as e:
278 parser.error(e)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000279
sbc@chromium.org013731e2015-02-26 18:28:43 +0000280 load_generation_numbers(targets)
281 if not opts.no_cache:
282 finalize(targets)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000283
Raul Tambre80ee78e2019-05-06 22:41:05 +0000284 print('\n'.join(map(str, map(get_num, targets))))
sbc@chromium.org013731e2015-02-26 18:28:43 +0000285 return 0
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000286
287
288if __name__ == '__main__': # pragma: no cover
sbc@chromium.org013731e2015-02-26 18:28:43 +0000289 try:
290 sys.exit(main())
291 except KeyboardInterrupt:
292 sys.stderr.write('interrupted\n')
293 sys.exit(1)