blob: d900bd98c5a654f615822da161751a2c2529ec6b [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wue41e0062017-09-01 19:04:14 +08002# Copyright 2017 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Git utility."""
6
7from __future__ import print_function
8import logging
Kuang-che Wubfc4a642018-04-19 11:54:08 +08009import os
Kuang-che Wue41e0062017-09-01 19:04:14 +080010import re
Kuang-che Wu3d04eda2019-09-05 23:56:40 +080011import shutil
Kuang-che Wue41e0062017-09-01 19:04:14 +080012import subprocess
Kuang-che Wu2b1286b2019-05-20 20:37:26 +080013import time
Kuang-che Wue41e0062017-09-01 19:04:14 +080014
15from bisect_kit import cli
16from bisect_kit import util
17
18logger = logging.getLogger(__name__)
19
20GIT_FULL_COMMIT_ID_LENGTH = 40
21
22# Minimal acceptable length of git commit id.
23#
24# For chromium, hash collision rate over number of digits:
25# - 6 digits: 4.85%
26# - 7 digits: 0.32%
27# - 8 digits: 0.01%
28# As foolproof check, 7 digits should be enough.
29GIT_MIN_COMMIT_ID_LENGTH = 7
30
31
32def is_git_rev(s):
33 """Is a git hash-like version string.
34
35 It accepts shortened hash with at least 7 digits.
36 """
37 if not GIT_MIN_COMMIT_ID_LENGTH <= len(s) <= GIT_FULL_COMMIT_ID_LENGTH:
38 return False
39 return bool(re.match(r'^[0-9a-f]+$', s))
40
41
42def argtype_git_rev(s):
43 """Validates git hash."""
44 if not is_git_rev(s):
45 msg = 'should be git hash, at least %d digits' % GIT_MIN_COMMIT_ID_LENGTH
46 raise cli.ArgTypeError(msg, '1a2b3c4d5e')
47 return s
48
49
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080050def is_git_root(path):
51 """Is given path root of git repo."""
52 return os.path.exists(os.path.join(path, '.git'))
53
54
Kuang-che Wu08366542019-01-12 12:37:49 +080055def is_git_bare_dir(path):
56 """Is inside .git folder or bare git checkout."""
57 if not os.path.isdir(path):
58 return False
59 try:
60 return util.check_output(
61 'git', 'rev-parse', '--is-bare-repository', cwd=path) == 'true\n'
62 except subprocess.CalledProcessError:
63 return False
64
65
Kuang-che Wu6948ecc2018-09-11 17:43:49 +080066def clone(git_repo, repo_url, reference=None):
67 if not os.path.exists(git_repo):
68 os.makedirs(git_repo)
69 cmd = ['git', 'clone', repo_url, '.']
70 if reference:
71 cmd += ['--reference', reference]
72 util.check_call(*cmd, cwd=git_repo)
73
74
Kuang-che Wue41e0062017-09-01 19:04:14 +080075def checkout_version(git_repo, rev):
76 """git checkout.
77
78 Args:
79 git_repo: path of git repo.
80 rev: git commit revision to checkout.
81 """
82 util.check_call('git', 'checkout', '-q', '-f', rev, cwd=git_repo)
83
84
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +080085def init(git_repo):
86 """git init.
87
88 git_repo and its parent directories will be created if they don't exist.
89
90 Args:
91 git_repo: path of git repo.
92 """
93 if not os.path.exists(git_repo):
94 os.makedirs(git_repo)
95
96 util.check_call('git', 'init', '-q', cwd=git_repo)
97
98
99def commit_file(git_repo,
100 path,
101 message,
102 content,
103 commit_time=None,
104 author_time=None):
105 """Commit a file.
106
107 Args:
108 git_repo: path of git repo
109 path: file path, relative to git_repo
110 message: commit message
111 content: file content
112 commit_time: commit timestamp
113 author_time: author timestamp
114 """
115 if author_time is None:
116 author_time = commit_time
117
118 env = {}
119 if author_time:
120 env['GIT_AUTHOR_DATE'] = str(author_time)
121 if commit_time:
122 env['GIT_COMMITTER_DATE'] = str(commit_time)
123
124 full_path = os.path.join(git_repo, path)
125 dirname = os.path.dirname(full_path)
126 if not os.path.exists(dirname):
127 os.makedirs(dirname)
128 with open(full_path, 'w') as f:
129 f.write(content)
130
131 util.check_call('git', 'add', path, cwd=git_repo)
132 util.check_call(
133 'git', 'commit', '-q', '-m', message, path, cwd=git_repo, env=env)
134
135
Kuang-che Wu1e49f512018-12-06 15:27:42 +0800136def config(git_repo, *args):
137 """Wrapper of 'git config'.
138
139 Args:
140 git_repo: path of git repo.
141 args: parameters pass to 'git config'
142 """
143 util.check_call('git', 'config', *args, cwd=git_repo)
144
145
146def fetch(git_repo, *args):
Kuang-che Wu2b1286b2019-05-20 20:37:26 +0800147 """Wrapper of 'git fetch' with retry support.
Kuang-che Wu1e49f512018-12-06 15:27:42 +0800148
149 Args:
150 git_repo: path of git repo.
151 args: parameters pass to 'git fetch'
152 """
Kuang-che Wu2b1286b2019-05-20 20:37:26 +0800153 for tries in range(5):
154 if tries > 0:
155 delay = min(60, 10 * 2**tries)
156 logger.warning('git fetch failed, will retry %s seconds later', delay)
157 time.sleep(delay)
158
159 stderr_lines = []
160 try:
161 util.check_call(
162 'git',
163 'fetch',
164 *args,
165 cwd=git_repo,
166 stderr_callback=stderr_lines.append)
167 break
168 except subprocess.CalledProcessError:
169 stderr = ''.join(stderr_lines)
170 # only retry 5xx internal server error
171 if 'The requested URL returned error: 5' not in stderr:
172 raise
173 else:
174 # Reached retry limit but haven't succeeded.
175 # In other words, there must be exceptions raised inside above loop.
176 logger.error('git fetch failed too much times')
177 # It's okay to raise because we are in the same scope as above loop.
178 # pylint: disable=misplaced-bare-raise
179 raise
Kuang-che Wu1e49f512018-12-06 15:27:42 +0800180
181
Kuang-che Wue41e0062017-09-01 19:04:14 +0800182def is_containing_commit(git_repo, rev):
183 """Determines given commit exists.
184
185 Args:
186 git_repo: path of git repo.
187 rev: git commit revision in query.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800188
189 Returns:
190 True if rev is inside given git repo. If git_repo is not a git folder,
191 returns False as well.
Kuang-che Wue41e0062017-09-01 19:04:14 +0800192 """
193 try:
194 return util.check_output(
195 'git', 'cat-file', '-t', rev, cwd=git_repo) == 'commit\n'
196 except subprocess.CalledProcessError:
197 return False
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800198 except OSError:
199 return False
Kuang-che Wue41e0062017-09-01 19:04:14 +0800200
201
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800202def is_ancestor_commit(git_repo, old, new):
203 """Determines `old` commit is ancestor of `new` commit.
204
205 Args:
206 git_repo: path of git repo.
207 old: the ancestor commit.
208 new: the descendant commit.
209
210 Returns:
211 True only if `old` is the ancestor of `new`. One commit is not considered
212 as ancestor of itself.
213 """
214 return util.check_output(
215 'git',
216 'rev-list',
217 '--ancestry-path',
218 '-1',
219 '%s..%s' % (old, new),
220 cwd=git_repo) != ''
221
222
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800223def get_commit_metadata(git_repo, rev):
224 """Get metadata of given commit.
225
226 Args:
227 git_repo: path of git repo.
228 rev: git commit revision in query.
229
230 Returns:
231 dict of metadata, including (if available):
232 tree: hash of git tree object
233 parent: list of parent commits; this field is unavailable for the very
234 first commit of git repo.
235 author: name and email of author
236 author_time: author timestamp (without timezone information)
237 committer: name and email of committer
238 committer_time: commit timestamp (without timezone information)
239 message: commit message text
240 """
241 meta = {}
242 data = util.check_output(
Kuang-che Wubcafc552019-08-15 15:27:02 +0800243 'git', 'cat-file', '-p', rev, cwd=git_repo, log_stdout=False)
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800244 header, meta['message'] = data.split('\n\n', 1)
245 for line in header.splitlines():
246 m = re.match(r'^tree (\w+)', line)
247 if m:
248 meta['tree'] = m.group(1)
249 continue
250
251 m = re.match(r'^parent (\w+)', line)
252 if m:
253 meta['parent'] = line.split()[1:]
254 continue
255
256 m = re.match(r'^(author|committer) (.*) (\d+) (\S+)$', line)
257 if m:
258 meta[m.group(1)] = m.group(2)
259 meta['%s_time' % m.group(1)] = int(m.group(3))
260 continue
261 return meta
262
263
Kuang-che Wue41e0062017-09-01 19:04:14 +0800264def get_revlist(git_repo, old, new):
265 """Enumerates git commit between two revisions (inclusive).
266
267 Args:
268 git_repo: path of git repo.
269 old: git commit revision.
270 new: git commit revision.
271
272 Returns:
273 list of git revisions. The list contains the input revisions, old and new.
274 """
275 assert old
276 assert new
277 cmd = ['git', 'rev-list', '--reverse', '%s^..%s' % (old, new)]
278 revlist = util.check_output(*cmd, cwd=git_repo).splitlines()
279 return revlist
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800280
281
282def get_commit_log(git_repo, rev):
283 """Get git commit log.
284
285 Args:
286 git_repo: path of git repo.
287 rev: git commit revision.
288
289 Returns:
290 commit log message
291 """
292 cmd = ['git', 'log', '-1', '--format=%B', rev]
293 msg = util.check_output(*cmd, cwd=git_repo)
294 return msg
295
296
Kuang-che Wu68db08a2018-03-30 11:50:34 +0800297def get_commit_hash(git_repo, rev):
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800298 """Get git commit hash.
299
300 Args:
301 git_repo: path of git repo.
302 rev: could be git tag, branch, or (shortened) commit hash
303
304 Returns:
305 full git commit hash
Kuang-che Wu5e7c9b02019-01-03 21:16:01 +0800306
307 Raises:
308 ValueError: `rev` is not unique or doesn't exist
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800309 """
Kuang-che Wu5e7c9b02019-01-03 21:16:01 +0800310 try:
311 # Use '^{commit}' to restrict search only commits.
312 # Use '--' to avoid ambiguity, like matching rev against path name.
313 output = util.check_output(
314 'git', 'rev-parse', '%s^{commit}' % rev, '--', cwd=git_repo)
315 git_rev = output.rstrip('-\n')
316 except subprocess.CalledProcessError:
317 # Do not use 'git rev-parse --disambiguate' to determine uniqueness
318 # because it searches objects other than commits as well.
319 raise ValueError('%s is not unique or does not exist' % rev)
320 assert is_git_rev(git_rev)
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800321 return git_rev
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800322
323
Zheng-Jie Chang868c1752020-01-21 14:42:41 +0800324def get_commit_time(git_repo, rev, path=None):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800325 """Get git commit timestamp.
326
327 Args:
328 git_repo: path of git repo
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800329 rev: git commit id, branch name, tag name, or other git object
330 path: path, relative to git_repo
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800331
332 Returns:
333 timestamp (int)
334 """
Zheng-Jie Chang868c1752020-01-21 14:42:41 +0800335 cmd = ['git', 'log', '-1', '--format=%ct', rev]
336 if path:
337 cmd += ['--', path]
338 line = util.check_output(*cmd, cwd=git_repo)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800339 return int(line)
340
341
342def get_file_from_revision(git_repo, rev, path):
343 """Get file content of given revision.
344
345 Args:
346 git_repo: path of git repo
347 rev: git commit id
348 path: file path
349
350 Returns:
351 file content (str)
352 """
353 return util.check_output(
Kuang-che Wubcafc552019-08-15 15:27:02 +0800354 'git', 'show', '%s:%s' % (rev, path), cwd=git_repo, log_stdout=False)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800355
356
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800357def list_dir_from_revision(git_repo, rev, path):
358 """Lists entries of directory of given revision.
359
360 Args:
361 git_repo: path of git repo
362 rev: git commit id
363 path: directory path, relative to git root
364
365 Returns:
366 list of names
367
368 Raises:
369 subprocess.CalledProcessError: if `path` doesn't exists in `rev`
370 """
371 return util.check_output(
372 'git',
373 'ls-tree',
374 '--name-only',
375 '%s:%s' % (rev, path),
376 cwd=git_repo,
Kuang-che Wubcafc552019-08-15 15:27:02 +0800377 log_stdout=False).splitlines()
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800378
379
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800380def get_rev_by_time(git_repo, timestamp, branch, path=None):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800381 """Query commit of given time.
382
383 Args:
384 git_repo: path of git repo.
385 timestamp: timestamp
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800386 branch: only query parent of the `branch`. If branch=None, it means 'HEAD'
387 (current branch, usually).
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800388 path: only query history of path, relative to git_repo
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800389
390 Returns:
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800391 git commit hash. None if path didn't exist at the given time.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800392 """
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800393 if not branch:
394 branch = 'HEAD'
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800395
396 cmd = [
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800397 'git',
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800398 'rev-list',
399 '--first-parent',
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800400 '-1',
401 '--before',
402 str(timestamp),
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800403 branch,
404 ]
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800405 if path:
406 cmd += ['--', path]
407
408 result = util.check_output(*cmd, cwd=git_repo).strip()
409 return result or None
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800410
411
Kuang-che Wu3d04eda2019-09-05 23:56:40 +0800412def reset_hard(git_repo):
413 """Restore modified and deleted files.
414
415 This is simply wrapper of "git reset --hard".
416
417 Args:
418 git_repo: path of git repo.
419 """
420 util.check_call('git', 'reset', '--hard', cwd=git_repo)
421
422
423def list_untracked(git_repo, excludes=None):
424 """List untracked files and directories.
425
426 Args:
427 git_repo: path of git repo.
428 excludes: files and/or directories to ignore, relative to git_repo
429
430 Returns:
431 list of paths, relative to git_repo
432 """
433 exclude_flags = []
434 if excludes:
435 for exclude in excludes:
436 assert not os.path.isabs(exclude), 'should be relative'
437 exclude_flags += ['--exclude', '/' + re.escape(exclude)]
438
439 result = []
440 for path in util.check_output(
441 'git',
442 'ls-files',
443 '--others',
444 '--exclude-standard',
445 *exclude_flags,
446 cwd=git_repo).splitlines():
447 # Remove the trailing slash, which means directory.
448 path = path.rstrip('/')
449 result.append(path)
450 return result
451
452
453def distclean(git_repo, excludes=None):
454 """Clean up git repo directory.
455
456 Restore modified and deleted files. Delete untracked files.
457
458 Args:
459 git_repo: path of git repo.
460 excludes: files and/or directories to ignore, relative to git_repo
461 """
462 reset_hard(git_repo)
463
464 # Delete untracked files.
465 for untracked in list_untracked(git_repo, excludes=excludes):
466 path = os.path.join(git_repo, untracked)
467 logger.debug('delete untracked: %s', path)
468 if os.path.isdir(path):
469 shutil.rmtree(path)
470 else:
471 os.unlink(path)
472
473
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800474def get_history(git_repo,
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +0800475 path=None,
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800476 branch=None,
477 after=None,
478 before=None,
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800479 padding=False,
480 with_subject=False):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800481 """Get commit history of given path.
482
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800483 `after` and `before` could be outside of lifetime of `path`. `padding` is
484 used to control what to return for such cases.
485
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800486 Args:
487 git_repo: path of git repo.
488 path: path to query, relative to git_repo
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800489 branch: branch name or ref name
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800490 after: limit history after given time (inclusive)
491 before: limit history before given time (inclusive)
492 padding: If True, pads returned result with dummy record at exact 'after'
493 and 'before' time, if 'path' existed at that time. Otherwise, only
494 returns real commits.
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800495 with_subject: If True, return commit subject together
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800496
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800497 Returns:
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800498 List of (timestamp, git hash, subject); or (timestamp, git hash) depends
499 on with_subject flag. They are all events when `path` was added, removed,
500 modified, and start and end time if `padding` is true. If `padding` and
501 `with_subject` are both true, 'dummy subject' will be returned as padding
502 history's subject.
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800503
504 For each pair, at `timestamp`, the repo state is `git hash`. In other
505 words, `timestamp` is not necessary the commit time of `git hash` for the
506 padded entries.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800507 """
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800508 log_format = '%ct %H' if not with_subject else '%ct %H %s'
509 cmd = ['git', 'log', '--reverse', '--first-parent', '--format=' + log_format]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800510 if after:
511 cmd += ['--after', str(after)]
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800512 if before:
513 cmd += ['--before', str(before)]
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800514 if branch:
515 assert not is_git_rev(branch)
516 cmd += [branch]
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +0800517 if path:
518 # '--' is necessary otherwise if `path` is removed in current revision, git
519 # will complain it's an ambiguous argument which may be path or something
520 # else (like git branch name, tag name, etc.)
521 cmd += ['--', path]
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800522
523 result = []
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800524 for line in util.check_output(*cmd, cwd=git_repo).splitlines():
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800525 # array = [timestamp, git_rev, subject] or [timestamp, git_rev]
526 array = line.split(' ', 2)
527 array[0] = int(array[0])
528 result.append(tuple(array))
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800529
530 if padding:
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800531 assert before or after, 'padding=True make no sense if they are both None'
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800532 history = [0, '']
533 if with_subject:
534 history.append('dummy subject')
535
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800536 if before is not None and get_rev_by_time(
537 git_repo, before, branch, path=path):
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800538 before = int(before)
539 if not result or result[-1][0] != before:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800540 git_rev = get_rev_by_time(git_repo, before, branch)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800541 assert git_rev
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800542 history[0:2] = [before, git_rev]
543 result.append(tuple(history))
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800544 if after is not None and get_rev_by_time(
545 git_repo, after, branch, path=path):
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800546 after = int(after)
547 if not result or result[0][0] != after:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800548 git_rev = get_rev_by_time(git_repo, after, branch)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800549 assert git_rev
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800550 history[0:2] = [after, git_rev]
551 result.insert(0, tuple(history))
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800552
553 return result
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800554
555
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800556def get_history_recursively(git_repo,
557 path,
558 after,
559 before,
560 parser_callback,
561 branch=None):
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800562 """Get commit history of given path and its dependencies.
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800563
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800564 In comparison to get_history(), get_history_recursively also takes
565 dependencies into consideration. For example, if file A referenced file B,
566 get_history_recursively(A) will return commits of B in addition to A. This
567 applies recursively, so commits of C will be included if file B referenced
568 file C, and so on.
569
570 This function is file type neutral. `parser_callback(filename, content)` will
571 be invoked to parse file content and should return list of filename of
Kuang-che Wu7d0c7592019-09-16 09:59:28 +0800572 dependencies. If `parser_callback` returns None (usually syntax error), the
573 commit is omitted.
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800574
575 Args:
576 git_repo: path of git repo
577 path: path to query, relative to git_repo
578 after: limit history after given time (inclusive)
579 before: limit history before given time (inclusive)
580 parser_callback: callback to parse file content. See above comment.
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800581 branch: branch name or ref name
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800582
583 Returns:
584 list of (commit timestamp, git hash)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800585 """
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800586 history = get_history(
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800587 git_repo, path, after=after, before=before, padding=True, branch=branch)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800588
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800589 # Collect include information of each commit.
590 includes = {}
591 for commit_time, git_rev in history:
592 content = get_file_from_revision(git_repo, git_rev, path)
Kuang-che Wu7d0c7592019-09-16 09:59:28 +0800593 parse_result = parser_callback(path, content)
594 if parse_result is None:
595 continue
596 for include_name in parse_result:
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800597 if include_name not in includes:
598 includes[include_name] = set()
599 includes[include_name].add(git_rev)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800600
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800601 # Analyze the start time and end time of each include.
602 dependencies = []
603 for include in includes:
604 appeared = None
605 for commit_time, git_rev in history:
606 if git_rev in includes[include]:
607 if not appeared:
608 appeared = commit_time
609 else:
610 if appeared:
611 dependencies.append((include, appeared, commit_time))
612 appeared = None
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800613
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800614 if appeared is not None:
615 dependencies.append((include, appeared, before))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800616
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800617 # Recursion and merge.
618 result = list(history)
619 for include, appeared, disappeared in dependencies:
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800620 result += get_history_recursively(
621 git_repo,
622 include,
623 appeared,
624 disappeared,
625 parser_callback,
626 branch=branch)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800627
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800628 # Sort and dedup.
629 result2 = []
Kuang-che Wuebb023c2018-11-29 15:49:32 +0800630 for x in sorted(result, key=lambda x: x[0]):
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800631 if result2 and result2[-1] == x:
632 continue
633 result2.append(x)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800634
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800635 return result2
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800636
637
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800638def get_branches(git_repo, all_branches=True, commit=None):
639 """Get branches of a repository.
640
641 Args:
642 git_repo: path of git repo
643 all_branches: return remote branches if is set to True
644 commit: return branches containing this commit if is not None
645
646 Returns:
647 list of branch names
648 """
649 cmd = ['git', 'branch', '--format=%(refname)']
650 if all_branches:
651 cmd += ['-a']
652 if commit:
653 cmd += ['--contains', commit]
654
655 result = []
656 for line in util.check_output(*cmd, cwd=git_repo).splitlines():
657 result.append(line.strip())
658 return result
659
660
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800661def list_commits_between_commits(git_repo, old, new):
662 """Get all commits between (old, new].
663
664 Args:
665 git_repo: path of git repo.
666 old: old commit hash (exclusive)
667 new: new commit hash (inclusive)
668
669 Returns:
670 list of (timestamp, rev)
671 """
672 assert old and new
673 assert old == new or is_ancestor_commit(git_repo, old, new)
674 commits = []
675 # --first-parent is necessary for Android, see following link for more
676 # discussion.
677 # https://docs.google.com/document/d/1c8qiq14_ObRRjLT62sk9r5V5cyCGHX66dLYab4MVnks/edit#heading=h.n3i6mt2n6xuu
678 for line in util.check_output(
679 'git',
680 'rev-list',
681 '--timestamp',
682 '--reverse',
683 '--first-parent',
684 '%s..%s' % (old, new),
685 cwd=git_repo).splitlines():
686 timestamp, git_rev = line.split()
687 commits.append([int(timestamp), git_rev])
688
689 # bisect-kit has a fundamental assumption that commit timestamps are
690 # increasing because we sort and bisect the commits by timestamp across git
691 # repos. If not increasing, we have to adjust the timestamp as workaround.
692 # This might lead to bad bisect result, however the bad probability is low in
693 # practice since most machines' clocks are good enough.
694 if commits != sorted(commits, key=lambda x: x[0]):
695 logger.warning('Commit timestamps are not increasing')
696 last_timestamp = -1
697 adjusted = 0
698 for commit in commits:
699 if commit[0] < last_timestamp:
700 commit[0] = last_timestamp
701 adjusted += 1
702
703 last_timestamp = commit[0]
704 logger.warning('%d timestamps adjusted', adjusted)
705
706 return commits