blob: 1867b97d7ca58b68856fa495cd303ca6907bbe10 [file] [log] [blame]
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +00001#!/usr/bin/env python
2# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Usage: %prog [options] [<commitref>]*
7
8If no <commitref>'s are supplied, it defaults to HEAD.
9
10Calculates the generation number for one or more commits in a git repo.
11
12Generation number of a commit C with parents P is defined as:
13 generation_number(C, []) = 0
14 generation_number(C, P) = max(map(generation_number, P)) + 1
15
16This number can be used to order commits relative to each other, as long as for
17any pair of the commits, one is an ancestor of the other.
18
19Since calculating the generation number of a commit requires walking that
20commit's entire history, this script caches all calculated data inside the git
21repo that it operates on in the ref 'refs/number/commits'.
22"""
23
24import binascii
25import collections
26import logging
27import optparse
28import os
29import struct
30import sys
31import tempfile
32
33import git_common as git
34import subprocess2
35
36CHUNK_FMT = '!20sL'
37CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
38DIRTY_TREES = collections.defaultdict(int)
39REF = 'refs/number/commits'
nodir@chromium.orgee740702014-04-03 01:43:32 +000040AUTHOR_NAME = 'git-number'
41AUTHOR_EMAIL = 'chrome-infrastructure-team@google.com'
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +000042
43# Number of bytes to use for the prefix on our internal number structure.
44# 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
45# need to reimplement cache data structures to be a bit more sophisticated than
46# dicts. 1 seems to be just right.
47PREFIX_LEN = 1
48
49# Set this to 'threads' to gather coverage data while testing.
50POOL_KIND = 'procs'
51
52
53def pathlify(hash_prefix):
54 """Converts a binary object hash prefix into a posix path, one folder per
55 byte.
56
57 >>> pathlify('\xDE\xAD')
58 'de/ad'
59 """
60 return '/'.join('%02x' % ord(b) for b in hash_prefix)
61
62
63@git.memoize_one(threadsafe=False)
64def get_number_tree(prefix_bytes):
65 """Returns a dictionary of the git-number registry specified by
66 |prefix_bytes|.
67
68 This is in the form of {<full binary ref>: <gen num> ...}
69
70 >>> get_number_tree('\x83\xb4')
71 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
72 """
73 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
74
75 try:
76 raw = buffer(git.run('cat-file', 'blob', ref, autostrip=False))
77 return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
78 for i in xrange(len(raw) / CHUNK_SIZE))
79 except subprocess2.CalledProcessError:
80 return {}
81
82
83@git.memoize_one(threadsafe=False)
84def get_num(commit_hash):
85 """Returns the generation number for a commit.
86
87 Returns None if the generation number for this commit hasn't been calculated
88 yet (see load_generation_numbers()).
89 """
90 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
91
92
93def clear_caches(on_disk=False):
94 """Clears in-process caches for e.g. unit testing."""
95 get_number_tree.clear()
96 get_num.clear()
97 if on_disk:
98 git.run('update-ref', '-d', REF)
99
100
101def intern_number_tree(tree):
102 """Transforms a number tree (in the form returned by |get_number_tree|) into
103 a git blob.
104
105 Returns the git blob id as hex-encoded string.
106
107 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
108 >>> intern_number_tree(d)
109 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
110 """
111 with tempfile.TemporaryFile() as f:
112 for k, v in sorted(tree.iteritems()):
113 f.write(struct.pack(CHUNK_FMT, k, v))
114 f.seek(0)
115 return git.intern_f(f)
116
117
118def leaf_map_fn((pre, tree)):
119 """Converts a prefix and number tree into a git index line."""
120 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
121
122
123def finalize(targets):
124 """Saves all cache data to the git repository.
125
126 After calculating the generation number for |targets|, call finalize() to
127 save all the work to the git repository.
128
129 This in particular saves the trees referred to by DIRTY_TREES.
130 """
131 if not DIRTY_TREES:
132 return
133
134 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
135
136 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
137 env = os.environ.copy()
138 env['GIT_INDEX_FILE'] = idx
139
140 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
141 with git.ProgressPrinter(progress_message) as inc:
142 git.run('read-tree', REF, env=env)
143
144 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
145 updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'],
146 stdin=subprocess2.PIPE, env=env)
147
148 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
149 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
150 updater.stdin.write(item)
151 inc()
152
153 updater.stdin.close()
154 updater.wait()
155 assert updater.returncode == 0
156
157 tree_id = git.run('write-tree', env=env)
nodir@chromium.orgee740702014-04-03 01:43:32 +0000158 commit_cmd = [
159 # Git user.name and/or user.email may not be configured, so specifying
160 # them explicitly. They are not used, but requried by Git.
161 '-c', 'user.name=%s' % AUTHOR_NAME,
162 '-c', 'user.email=%s' % AUTHOR_EMAIL,
163 'commit-tree',
164 '-m', msg,
165 '-p'] + git.hash_multi(REF)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000166 for t in targets:
167 commit_cmd.extend(['-p', binascii.hexlify(t)])
168 commit_cmd.append(tree_id)
169 commit_hash = git.run(*commit_cmd)
170 git.run('update-ref', REF, commit_hash)
171 DIRTY_TREES.clear()
172
173
174def preload_tree(prefix):
175 """Returns the prefix and parsed tree object for the specified prefix."""
176 return prefix, get_number_tree(prefix)
177
178
179def all_prefixes(depth=PREFIX_LEN):
180 for x in (chr(i) for i in xrange(255)):
181 # This isn't covered because PREFIX_LEN currently == 1
182 if depth > 1: # pragma: no cover
183 for r in all_prefixes(depth - 1):
184 yield x + r
185 else:
186 yield x
187
188
189def load_generation_numbers(targets):
190 """Populates the caches of get_num and get_number_tree so they contain
191 the results for |targets|.
192
193 Loads cached numbers from disk, and calculates missing numbers if one or
194 more of |targets| is newer than the cached calculations.
195
196 Args:
197 targets - An iterable of binary-encoded full git commit hashes.
198 """
199 # In case they pass us a generator, listify targets.
200 targets = list(targets)
201
202 if all(get_num(t) is not None for t in targets):
203 return
204
205 if git.tree(REF) is None:
206 empty = git.mktree({})
207 commit_hash = git.run('commit-tree', '-m', 'Initial commit from git-number',
208 empty)
209 git.run('update-ref', REF, commit_hash)
210
211 with git.ScopedPool(kind=POOL_KIND) as pool:
212 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
213
214 rev_list = []
215
216 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
217 # Curiously, buffering the list into memory seems to be the fastest
218 # approach in python (as opposed to iterating over the lines in the
219 # stdout as they're produced). GIL strikes again :/
220 cmd = [
221 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF,
222 ] + map(binascii.hexlify, targets)
223 for line in git.run(*cmd).splitlines():
224 tokens = map(binascii.unhexlify, line.split())
225 rev_list.append((tokens[0], tokens[1:]))
226 inc()
227
228 get_number_tree.update(preload_iter)
229
230 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
231 for commit_hash, pars in rev_list:
232 num = max(map(get_num, pars)) + 1 if pars else 0
233
234 prefix = commit_hash[:PREFIX_LEN]
235 get_number_tree(prefix)[commit_hash] = num
236 DIRTY_TREES[prefix] += 1
237 get_num.set(commit_hash, num)
238
239 inc()
240
241
242def main(): # pragma: no cover
243 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
244 parser.add_option('--no-cache', action='store_true',
245 help='Do not actually cache anything we calculate.')
246 parser.add_option('--reset', action='store_true',
247 help='Reset the generation number cache and quit.')
248 parser.add_option('-v', '--verbose', action='count', default=0,
249 help='Be verbose. Use more times for more verbosity.')
250 opts, args = parser.parse_args()
251
252 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
253 logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)])
254
dnj@chromium.orge57a6eb2014-09-02 20:49:59 +0000255 # 'git number' should only be used on bots.
256 if os.getenv('CHROME_HEADLESS') != '1':
257 logging.error("'git-number' is an infrastructure tool that is only "
258 "intended to be used internally by bots. Developers should "
259 "use the 'Cr-Commit-Position' value in the commit's message.")
260 return 1
261
sbc@chromium.org013731e2015-02-26 18:28:43 +0000262 if opts.reset:
263 clear_caches(on_disk=True)
264 return
265
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000266 try:
sbc@chromium.org013731e2015-02-26 18:28:43 +0000267 targets = git.parse_commitrefs(*(args or ['HEAD']))
268 except git.BadCommitRefException as e:
269 parser.error(e)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000270
sbc@chromium.org013731e2015-02-26 18:28:43 +0000271 load_generation_numbers(targets)
272 if not opts.no_cache:
273 finalize(targets)
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000274
sbc@chromium.org013731e2015-02-26 18:28:43 +0000275 print '\n'.join(map(str, map(get_num, targets)))
276 return 0
iannucci@chromium.orgaa74cf62013-11-19 20:00:49 +0000277
278
279if __name__ == '__main__': # pragma: no cover
sbc@chromium.org013731e2015-02-26 18:28:43 +0000280 try:
281 sys.exit(main())
282 except KeyboardInterrupt:
283 sys.stderr.write('interrupted\n')
284 sys.exit(1)