blob: 0d89d657266fd758fe56868d7a055f8403e26397 [file] [log] [blame]
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +00001#!/usr/bin/env python
2# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Usage: %prog [options] [<commitref>]*
7
8If no <commitref>'s are supplied, it defaults to HEAD.
9
10Calculates the generation number for one or more commits in a git repo.
11
12Generation number of a commit C with parents P is defined as:
13 generation_number(C, []) = 0
14 generation_number(C, P) = max(map(generation_number, P)) + 1
15
16This number can be used to order commits relative to each other, as long as for
17any pair of the commits, one is an ancestor of the other.
18
19Since calculating the generation number of a commit requires walking that
20commit's entire history, this script caches all calculated data inside the git
21repo that it operates on in the ref 'refs/number/commits'.
22"""
23
24import binascii
25import collections
26import logging
27import optparse
28import os
29import struct
30import sys
31import tempfile
32
33import git_common as git
34import subprocess2
35
36CHUNK_FMT = '!20sL'
37CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
38DIRTY_TREES = collections.defaultdict(int)
39REF = 'refs/number/commits'
nodir@chromium.orgee740702014-04-03 01:43:32 +000040AUTHOR_NAME = 'git-number'
41AUTHOR_EMAIL = 'chrome-infrastructure-team@google.com'
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000042
43# Number of bytes to use for the prefix on our internal number structure.
44# 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
45# need to reimplement cache data structures to be a bit more sophisticated than
46# dicts. 1 seems to be just right.
47PREFIX_LEN = 1
48
49# Set this to 'threads' to gather coverage data while testing.
50POOL_KIND = 'procs'
51
52
53def pathlify(hash_prefix):
54 """Converts a binary object hash prefix into a posix path, one folder per
55 byte.
56
57 >>> pathlify('\xDE\xAD')
58 'de/ad'
59 """
60 return '/'.join('%02x' % ord(b) for b in hash_prefix)
61
62
63@git.memoize_one(threadsafe=False)
64def get_number_tree(prefix_bytes):
65 """Returns a dictionary of the git-number registry specified by
66 |prefix_bytes|.
67
68 This is in the form of {<full binary ref>: <gen num> ...}
69
70 >>> get_number_tree('\x83\xb4')
71 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
72 """
73 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
74
75 try:
76 raw = buffer(git.run('cat-file', 'blob', ref, autostrip=False))
77 return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
78 for i in xrange(len(raw) / CHUNK_SIZE))
79 except subprocess2.CalledProcessError:
80 return {}
81
82
83@git.memoize_one(threadsafe=False)
84def get_num(commit_hash):
85 """Returns the generation number for a commit.
86
87 Returns None if the generation number for this commit hasn't been calculated
88 yet (see load_generation_numbers()).
89 """
90 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
91
92
93def clear_caches(on_disk=False):
94 """Clears in-process caches for e.g. unit testing."""
95 get_number_tree.clear()
96 get_num.clear()
97 if on_disk:
98 git.run('update-ref', '-d', REF)
99
100
101def intern_number_tree(tree):
102 """Transforms a number tree (in the form returned by |get_number_tree|) into
103 a git blob.
104
105 Returns the git blob id as hex-encoded string.
106
107 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
108 >>> intern_number_tree(d)
109 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
110 """
111 with tempfile.TemporaryFile() as f:
112 for k, v in sorted(tree.iteritems()):
113 f.write(struct.pack(CHUNK_FMT, k, v))
114 f.seek(0)
115 return git.intern_f(f)
116
117
118def leaf_map_fn((pre, tree)):
119 """Converts a prefix and number tree into a git index line."""
120 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
121
122
123def finalize(targets):
124 """Saves all cache data to the git repository.
125
126 After calculating the generation number for |targets|, call finalize() to
127 save all the work to the git repository.
128
129 This in particular saves the trees referred to by DIRTY_TREES.
130 """
131 if not DIRTY_TREES:
132 return
133
134 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
135
136 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
137 env = os.environ.copy()
138 env['GIT_INDEX_FILE'] = idx
139
140 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
141 with git.ProgressPrinter(progress_message) as inc:
142 git.run('read-tree', REF, env=env)
143
144 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
145 updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'],
146 stdin=subprocess2.PIPE, env=env)
147
148 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
149 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
150 updater.stdin.write(item)
151 inc()
152
153 updater.stdin.close()
154 updater.wait()
155 assert updater.returncode == 0
156
157 tree_id = git.run('write-tree', env=env)
nodir@chromium.orgee740702014-04-03 01:43:32 +0000158 commit_cmd = [
159 # Git user.name and/or user.email may not be configured, so specifying
160 # them explicitly. They are not used, but requried by Git.
161 '-c', 'user.name=%s' % AUTHOR_NAME,
162 '-c', 'user.email=%s' % AUTHOR_EMAIL,
163 'commit-tree',
164 '-m', msg,
165 '-p'] + git.hash_multi(REF)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000166 for t in targets:
167 commit_cmd.extend(['-p', binascii.hexlify(t)])
168 commit_cmd.append(tree_id)
169 commit_hash = git.run(*commit_cmd)
170 git.run('update-ref', REF, commit_hash)
171 DIRTY_TREES.clear()
172
173
174def preload_tree(prefix):
175 """Returns the prefix and parsed tree object for the specified prefix."""
176 return prefix, get_number_tree(prefix)
177
178
179def all_prefixes(depth=PREFIX_LEN):
180 for x in (chr(i) for i in xrange(255)):
181 # This isn't covered because PREFIX_LEN currently == 1
182 if depth > 1: # pragma: no cover
183 for r in all_prefixes(depth - 1):
184 yield x + r
185 else:
186 yield x
187
188
189def load_generation_numbers(targets):
190 """Populates the caches of get_num and get_number_tree so they contain
191 the results for |targets|.
192
193 Loads cached numbers from disk, and calculates missing numbers if one or
194 more of |targets| is newer than the cached calculations.
195
196 Args:
197 targets - An iterable of binary-encoded full git commit hashes.
198 """
199 # In case they pass us a generator, listify targets.
200 targets = list(targets)
201
202 if all(get_num(t) is not None for t in targets):
203 return
204
205 if git.tree(REF) is None:
206 empty = git.mktree({})
John Budorickbcec9e72017-06-01 07:42:07 -0700207 commit_hash = git.run(
208 # Git user.name and/or user.email may not be configured, so specifying
209 # them explicitly. They are not used, but requried by Git.
210 '-c', 'user.name=%s' % AUTHOR_NAME,
211 '-c', 'user.email=%s' % AUTHOR_EMAIL,
212 'commit-tree',
213 '-m', 'Initial commit from git-number',
214 empty)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000215 git.run('update-ref', REF, commit_hash)
216
217 with git.ScopedPool(kind=POOL_KIND) as pool:
218 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
219
220 rev_list = []
221
222 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
223 # Curiously, buffering the list into memory seems to be the fastest
224 # approach in python (as opposed to iterating over the lines in the
225 # stdout as they're produced). GIL strikes again :/
226 cmd = [
227 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF,
228 ] + map(binascii.hexlify, targets)
229 for line in git.run(*cmd).splitlines():
230 tokens = map(binascii.unhexlify, line.split())
231 rev_list.append((tokens[0], tokens[1:]))
232 inc()
233
234 get_number_tree.update(preload_iter)
235
236 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
237 for commit_hash, pars in rev_list:
238 num = max(map(get_num, pars)) + 1 if pars else 0
239
240 prefix = commit_hash[:PREFIX_LEN]
241 get_number_tree(prefix)[commit_hash] = num
242 DIRTY_TREES[prefix] += 1
243 get_num.set(commit_hash, num)
244
245 inc()
246
247
248def main(): # pragma: no cover
249 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
250 parser.add_option('--no-cache', action='store_true',
251 help='Do not actually cache anything we calculate.')
252 parser.add_option('--reset', action='store_true',
253 help='Reset the generation number cache and quit.')
254 parser.add_option('-v', '--verbose', action='count', default=0,
255 help='Be verbose. Use more times for more verbosity.')
256 opts, args = parser.parse_args()
257
258 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
259 logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)])
260
dnj@chromium.orge57a6eb2014-09-02 20:49:59 +0000261 # 'git number' should only be used on bots.
262 if os.getenv('CHROME_HEADLESS') != '1':
263 logging.error("'git-number' is an infrastructure tool that is only "
264 "intended to be used internally by bots. Developers should "
265 "use the 'Cr-Commit-Position' value in the commit's message.")
266 return 1
267
sbc@chromium.org013731e2015-02-26 18:28:43 +0000268 if opts.reset:
269 clear_caches(on_disk=True)
270 return
271
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000272 try:
sbc@chromium.org013731e2015-02-26 18:28:43 +0000273 targets = git.parse_commitrefs(*(args or ['HEAD']))
274 except git.BadCommitRefException as e:
275 parser.error(e)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000276
sbc@chromium.org013731e2015-02-26 18:28:43 +0000277 load_generation_numbers(targets)
278 if not opts.no_cache:
279 finalize(targets)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000280
sbc@chromium.org013731e2015-02-26 18:28:43 +0000281 print '\n'.join(map(str, map(get_num, targets)))
282 return 0
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000283
284
285if __name__ == '__main__': # pragma: no cover
sbc@chromium.org013731e2015-02-26 18:28:43 +0000286 try:
287 sys.exit(main())
288 except KeyboardInterrupt:
289 sys.stderr.write('interrupted\n')
290 sys.exit(1)