blob: 21a2031bf117afccd865e5d0d36159f2ff79acb0 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wue41e0062017-09-01 19:04:14 +08002# Copyright 2017 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Git utility."""
6
7from __future__ import print_function
8import logging
Kuang-che Wubfc4a642018-04-19 11:54:08 +08009import os
Kuang-che Wue41e0062017-09-01 19:04:14 +080010import re
Kuang-che Wu3d04eda2019-09-05 23:56:40 +080011import shutil
Kuang-che Wue41e0062017-09-01 19:04:14 +080012import subprocess
Kuang-che Wu2b1286b2019-05-20 20:37:26 +080013import time
Kuang-che Wue41e0062017-09-01 19:04:14 +080014
15from bisect_kit import cli
16from bisect_kit import util
17
18logger = logging.getLogger(__name__)
19
20GIT_FULL_COMMIT_ID_LENGTH = 40
21
22# Minimal acceptable length of git commit id.
23#
24# For chromium, hash collision rate over number of digits:
25# - 6 digits: 4.85%
26# - 7 digits: 0.32%
27# - 8 digits: 0.01%
28# As foolproof check, 7 digits should be enough.
29GIT_MIN_COMMIT_ID_LENGTH = 7
30
31
32def is_git_rev(s):
33 """Is a git hash-like version string.
34
35 It accepts shortened hash with at least 7 digits.
36 """
37 if not GIT_MIN_COMMIT_ID_LENGTH <= len(s) <= GIT_FULL_COMMIT_ID_LENGTH:
38 return False
39 return bool(re.match(r'^[0-9a-f]+$', s))
40
41
42def argtype_git_rev(s):
43 """Validates git hash."""
44 if not is_git_rev(s):
45 msg = 'should be git hash, at least %d digits' % GIT_MIN_COMMIT_ID_LENGTH
46 raise cli.ArgTypeError(msg, '1a2b3c4d5e')
47 return s
48
49
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080050def is_git_root(path):
51 """Is given path root of git repo."""
52 return os.path.exists(os.path.join(path, '.git'))
53
54
Kuang-che Wu08366542019-01-12 12:37:49 +080055def is_git_bare_dir(path):
56 """Is inside .git folder or bare git checkout."""
57 if not os.path.isdir(path):
58 return False
59 try:
60 return util.check_output(
61 'git', 'rev-parse', '--is-bare-repository', cwd=path) == 'true\n'
62 except subprocess.CalledProcessError:
63 return False
64
65
Kuang-che Wu6948ecc2018-09-11 17:43:49 +080066def clone(git_repo, repo_url, reference=None):
67 if not os.path.exists(git_repo):
68 os.makedirs(git_repo)
69 cmd = ['git', 'clone', repo_url, '.']
70 if reference:
71 cmd += ['--reference', reference]
72 util.check_call(*cmd, cwd=git_repo)
73
74
Kuang-che Wue41e0062017-09-01 19:04:14 +080075def checkout_version(git_repo, rev):
76 """git checkout.
77
78 Args:
79 git_repo: path of git repo.
80 rev: git commit revision to checkout.
81 """
82 util.check_call('git', 'checkout', '-q', '-f', rev, cwd=git_repo)
83
84
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +080085def init(git_repo):
86 """git init.
87
88 git_repo and its parent directories will be created if they don't exist.
89
90 Args:
91 git_repo: path of git repo.
92 """
93 if not os.path.exists(git_repo):
94 os.makedirs(git_repo)
95
96 util.check_call('git', 'init', '-q', cwd=git_repo)
97
98
99def commit_file(git_repo,
100 path,
101 message,
102 content,
103 commit_time=None,
104 author_time=None):
105 """Commit a file.
106
107 Args:
108 git_repo: path of git repo
109 path: file path, relative to git_repo
110 message: commit message
111 content: file content
112 commit_time: commit timestamp
113 author_time: author timestamp
114 """
115 if author_time is None:
116 author_time = commit_time
117
118 env = {}
119 if author_time:
120 env['GIT_AUTHOR_DATE'] = str(author_time)
121 if commit_time:
122 env['GIT_COMMITTER_DATE'] = str(commit_time)
123
124 full_path = os.path.join(git_repo, path)
125 dirname = os.path.dirname(full_path)
126 if not os.path.exists(dirname):
127 os.makedirs(dirname)
128 with open(full_path, 'w') as f:
129 f.write(content)
130
131 util.check_call('git', 'add', path, cwd=git_repo)
132 util.check_call(
133 'git', 'commit', '-q', '-m', message, path, cwd=git_repo, env=env)
134
135
Kuang-che Wu1e49f512018-12-06 15:27:42 +0800136def config(git_repo, *args):
137 """Wrapper of 'git config'.
138
139 Args:
140 git_repo: path of git repo.
141 args: parameters pass to 'git config'
142 """
143 util.check_call('git', 'config', *args, cwd=git_repo)
144
145
146def fetch(git_repo, *args):
Kuang-che Wu2b1286b2019-05-20 20:37:26 +0800147 """Wrapper of 'git fetch' with retry support.
Kuang-che Wu1e49f512018-12-06 15:27:42 +0800148
149 Args:
150 git_repo: path of git repo.
151 args: parameters pass to 'git fetch'
152 """
Kuang-che Wu2b1286b2019-05-20 20:37:26 +0800153 for tries in range(5):
154 if tries > 0:
155 delay = min(60, 10 * 2**tries)
156 logger.warning('git fetch failed, will retry %s seconds later', delay)
157 time.sleep(delay)
158
159 stderr_lines = []
160 try:
161 util.check_call(
162 'git',
163 'fetch',
164 *args,
165 cwd=git_repo,
166 stderr_callback=stderr_lines.append)
167 break
168 except subprocess.CalledProcessError:
169 stderr = ''.join(stderr_lines)
170 # only retry 5xx internal server error
171 if 'The requested URL returned error: 5' not in stderr:
172 raise
173 else:
174 # Reached retry limit but haven't succeeded.
175 # In other words, there must be exceptions raised inside above loop.
176 logger.error('git fetch failed too much times')
177 # It's okay to raise because we are in the same scope as above loop.
178 # pylint: disable=misplaced-bare-raise
179 raise
Kuang-che Wu1e49f512018-12-06 15:27:42 +0800180
181
Kuang-che Wue41e0062017-09-01 19:04:14 +0800182def is_containing_commit(git_repo, rev):
183 """Determines given commit exists.
184
185 Args:
186 git_repo: path of git repo.
187 rev: git commit revision in query.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800188
189 Returns:
190 True if rev is inside given git repo. If git_repo is not a git folder,
191 returns False as well.
Kuang-che Wue41e0062017-09-01 19:04:14 +0800192 """
193 try:
194 return util.check_output(
195 'git', 'cat-file', '-t', rev, cwd=git_repo) == 'commit\n'
196 except subprocess.CalledProcessError:
197 return False
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800198 except OSError:
199 return False
Kuang-che Wue41e0062017-09-01 19:04:14 +0800200
201
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800202def is_ancestor_commit(git_repo, old, new):
203 """Determines `old` commit is ancestor of `new` commit.
204
205 Args:
206 git_repo: path of git repo.
207 old: the ancestor commit.
208 new: the descendant commit.
209
210 Returns:
211 True only if `old` is the ancestor of `new`. One commit is not considered
212 as ancestor of itself.
213 """
214 return util.check_output(
215 'git',
216 'rev-list',
217 '--ancestry-path',
218 '-1',
219 '%s..%s' % (old, new),
220 cwd=git_repo) != ''
221
222
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800223def get_commit_metadata(git_repo, rev):
224 """Get metadata of given commit.
225
226 Args:
227 git_repo: path of git repo.
228 rev: git commit revision in query.
229
230 Returns:
231 dict of metadata, including (if available):
232 tree: hash of git tree object
233 parent: list of parent commits; this field is unavailable for the very
234 first commit of git repo.
235 author: name and email of author
236 author_time: author timestamp (without timezone information)
237 committer: name and email of committer
238 committer_time: commit timestamp (without timezone information)
239 message: commit message text
240 """
241 meta = {}
242 data = util.check_output(
Kuang-che Wubcafc552019-08-15 15:27:02 +0800243 'git', 'cat-file', '-p', rev, cwd=git_repo, log_stdout=False)
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800244 header, meta['message'] = data.split('\n\n', 1)
245 for line in header.splitlines():
246 m = re.match(r'^tree (\w+)', line)
247 if m:
248 meta['tree'] = m.group(1)
249 continue
250
251 m = re.match(r'^parent (\w+)', line)
252 if m:
253 meta['parent'] = line.split()[1:]
254 continue
255
256 m = re.match(r'^(author|committer) (.*) (\d+) (\S+)$', line)
257 if m:
258 meta[m.group(1)] = m.group(2)
259 meta['%s_time' % m.group(1)] = int(m.group(3))
260 continue
261 return meta
262
263
Kuang-che Wue41e0062017-09-01 19:04:14 +0800264def get_revlist(git_repo, old, new):
265 """Enumerates git commit between two revisions (inclusive).
266
267 Args:
268 git_repo: path of git repo.
269 old: git commit revision.
270 new: git commit revision.
271
272 Returns:
273 list of git revisions. The list contains the input revisions, old and new.
274 """
275 assert old
276 assert new
277 cmd = ['git', 'rev-list', '--reverse', '%s^..%s' % (old, new)]
278 revlist = util.check_output(*cmd, cwd=git_repo).splitlines()
279 return revlist
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800280
281
282def get_commit_log(git_repo, rev):
283 """Get git commit log.
284
285 Args:
286 git_repo: path of git repo.
287 rev: git commit revision.
288
289 Returns:
290 commit log message
291 """
292 cmd = ['git', 'log', '-1', '--format=%B', rev]
293 msg = util.check_output(*cmd, cwd=git_repo)
294 return msg
295
296
Kuang-che Wu68db08a2018-03-30 11:50:34 +0800297def get_commit_hash(git_repo, rev):
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800298 """Get git commit hash.
299
300 Args:
301 git_repo: path of git repo.
302 rev: could be git tag, branch, or (shortened) commit hash
303
304 Returns:
305 full git commit hash
Kuang-che Wu5e7c9b02019-01-03 21:16:01 +0800306
307 Raises:
308 ValueError: `rev` is not unique or doesn't exist
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800309 """
Kuang-che Wu5e7c9b02019-01-03 21:16:01 +0800310 try:
311 # Use '^{commit}' to restrict search only commits.
312 # Use '--' to avoid ambiguity, like matching rev against path name.
313 output = util.check_output(
314 'git', 'rev-parse', '%s^{commit}' % rev, '--', cwd=git_repo)
315 git_rev = output.rstrip('-\n')
316 except subprocess.CalledProcessError:
317 # Do not use 'git rev-parse --disambiguate' to determine uniqueness
318 # because it searches objects other than commits as well.
319 raise ValueError('%s is not unique or does not exist' % rev)
320 assert is_git_rev(git_rev)
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800321 return git_rev
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800322
323
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800324def get_commit_time(git_repo, rev, path):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800325 """Get git commit timestamp.
326
327 Args:
328 git_repo: path of git repo
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800329 rev: git commit id, branch name, tag name, or other git object
330 path: path, relative to git_repo
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800331
332 Returns:
333 timestamp (int)
334 """
335 line = util.check_output(
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800336 'git', 'log', '-1', '--format=%ct', rev, '--', path, cwd=git_repo)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800337 return int(line)
338
339
340def get_file_from_revision(git_repo, rev, path):
341 """Get file content of given revision.
342
343 Args:
344 git_repo: path of git repo
345 rev: git commit id
346 path: file path
347
348 Returns:
349 file content (str)
350 """
351 return util.check_output(
Kuang-che Wubcafc552019-08-15 15:27:02 +0800352 'git', 'show', '%s:%s' % (rev, path), cwd=git_repo, log_stdout=False)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800353
354
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800355def list_dir_from_revision(git_repo, rev, path):
356 """Lists entries of directory of given revision.
357
358 Args:
359 git_repo: path of git repo
360 rev: git commit id
361 path: directory path, relative to git root
362
363 Returns:
364 list of names
365
366 Raises:
367 subprocess.CalledProcessError: if `path` doesn't exists in `rev`
368 """
369 return util.check_output(
370 'git',
371 'ls-tree',
372 '--name-only',
373 '%s:%s' % (rev, path),
374 cwd=git_repo,
Kuang-che Wubcafc552019-08-15 15:27:02 +0800375 log_stdout=False).splitlines()
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800376
377
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800378def get_rev_by_time(git_repo, timestamp, branch, path=None):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800379 """Query commit of given time.
380
381 Args:
382 git_repo: path of git repo.
383 timestamp: timestamp
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800384 branch: only query parent of the `branch`. If branch=None, it means 'HEAD'
385 (current branch, usually).
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800386 path: only query history of path, relative to git_repo
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800387
388 Returns:
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800389 git commit hash. None if path didn't exist at the given time.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800390 """
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800391 if not branch:
392 branch = 'HEAD'
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800393
394 cmd = [
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800395 'git',
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800396 'rev-list',
397 '--first-parent',
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800398 '-1',
399 '--before',
400 str(timestamp),
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800401 branch,
402 ]
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800403 if path:
404 cmd += ['--', path]
405
406 result = util.check_output(*cmd, cwd=git_repo).strip()
407 return result or None
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800408
409
Kuang-che Wu3d04eda2019-09-05 23:56:40 +0800410def reset_hard(git_repo):
411 """Restore modified and deleted files.
412
413 This is simply wrapper of "git reset --hard".
414
415 Args:
416 git_repo: path of git repo.
417 """
418 util.check_call('git', 'reset', '--hard', cwd=git_repo)
419
420
421def list_untracked(git_repo, excludes=None):
422 """List untracked files and directories.
423
424 Args:
425 git_repo: path of git repo.
426 excludes: files and/or directories to ignore, relative to git_repo
427
428 Returns:
429 list of paths, relative to git_repo
430 """
431 exclude_flags = []
432 if excludes:
433 for exclude in excludes:
434 assert not os.path.isabs(exclude), 'should be relative'
435 exclude_flags += ['--exclude', '/' + re.escape(exclude)]
436
437 result = []
438 for path in util.check_output(
439 'git',
440 'ls-files',
441 '--others',
442 '--exclude-standard',
443 *exclude_flags,
444 cwd=git_repo).splitlines():
445 # Remove the trailing slash, which means directory.
446 path = path.rstrip('/')
447 result.append(path)
448 return result
449
450
451def distclean(git_repo, excludes=None):
452 """Clean up git repo directory.
453
454 Restore modified and deleted files. Delete untracked files.
455
456 Args:
457 git_repo: path of git repo.
458 excludes: files and/or directories to ignore, relative to git_repo
459 """
460 reset_hard(git_repo)
461
462 # Delete untracked files.
463 for untracked in list_untracked(git_repo, excludes=excludes):
464 path = os.path.join(git_repo, untracked)
465 logger.debug('delete untracked: %s', path)
466 if os.path.isdir(path):
467 shutil.rmtree(path)
468 else:
469 os.unlink(path)
470
471
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800472def get_history(git_repo,
473 path,
474 branch=None,
475 after=None,
476 before=None,
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800477 padding=False,
478 with_subject=False):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800479 """Get commit history of given path.
480
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800481 `after` and `before` could be outside of lifetime of `path`. `padding` is
482 used to control what to return for such cases.
483
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800484 Args:
485 git_repo: path of git repo.
486 path: path to query, relative to git_repo
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800487 branch: branch name or ref name
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800488 after: limit history after given time (inclusive)
489 before: limit history before given time (inclusive)
490 padding: If True, pads returned result with dummy record at exact 'after'
491 and 'before' time, if 'path' existed at that time. Otherwise, only
492 returns real commits.
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800493 with_subject: If True, return commit subject together
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800494
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800495 Returns:
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800496 List of (timestamp, git hash, subject); or (timestamp, git hash) depends
497 on with_subject flag. They are all events when `path` was added, removed,
498 modified, and start and end time if `padding` is true. If `padding` and
499 `with_subject` are both true, 'dummy subject' will be returned as padding
500 history's subject.
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800501
502 For each pair, at `timestamp`, the repo state is `git hash`. In other
503 words, `timestamp` is not necessary the commit time of `git hash` for the
504 padded entries.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800505 """
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800506 log_format = '%ct %H' if not with_subject else '%ct %H %s'
507 cmd = ['git', 'log', '--reverse', '--first-parent', '--format=' + log_format]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800508 if after:
509 cmd += ['--after', str(after)]
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800510 if before:
511 cmd += ['--before', str(before)]
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800512 if branch:
513 assert not is_git_rev(branch)
514 cmd += [branch]
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800515 # '--' is necessary otherwise if `path` is removed in current revision, git
516 # will complain it's an ambiguous argument which may be path or something
517 # else (like git branch name, tag name, etc.)
518 cmd += ['--', path]
519
520 result = []
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800521 for line in util.check_output(*cmd, cwd=git_repo).splitlines():
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800522 # array = [timestamp, git_rev, subject] or [timestamp, git_rev]
523 array = line.split(' ', 2)
524 array[0] = int(array[0])
525 result.append(tuple(array))
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800526
527 if padding:
Kuang-che Wuae6824b2019-08-27 22:20:01 +0800528 assert before or after, 'padding=True make no sense if they are both None'
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800529 history = [0, '']
530 if with_subject:
531 history.append('dummy subject')
532
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800533 if before is not None and get_rev_by_time(
534 git_repo, before, branch, path=path):
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800535 before = int(before)
536 if not result or result[-1][0] != before:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800537 git_rev = get_rev_by_time(git_repo, before, branch)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800538 assert git_rev
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800539 history[0:2] = [before, git_rev]
540 result.append(tuple(history))
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800541 if after is not None and get_rev_by_time(
542 git_repo, after, branch, path=path):
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800543 after = int(after)
544 if not result or result[0][0] != after:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800545 git_rev = get_rev_by_time(git_repo, after, branch)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800546 assert git_rev
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800547 history[0:2] = [after, git_rev]
548 result.insert(0, tuple(history))
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800549
550 return result
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800551
552
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800553def get_history_recursively(git_repo, path, after, before, parser_callback):
554 """Get commit history of given path and its dependencies.
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800555
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800556 In comparison to get_history(), get_history_recursively also takes
557 dependencies into consideration. For example, if file A referenced file B,
558 get_history_recursively(A) will return commits of B in addition to A. This
559 applies recursively, so commits of C will be included if file B referenced
560 file C, and so on.
561
562 This function is file type neutral. `parser_callback(filename, content)` will
563 be invoked to parse file content and should return list of filename of
Kuang-che Wu7d0c7592019-09-16 09:59:28 +0800564 dependencies. If `parser_callback` returns None (usually syntax error), the
565 commit is omitted.
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800566
567 Args:
568 git_repo: path of git repo
569 path: path to query, relative to git_repo
570 after: limit history after given time (inclusive)
571 before: limit history before given time (inclusive)
572 parser_callback: callback to parse file content. See above comment.
573
574 Returns:
575 list of (commit timestamp, git hash)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800576 """
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800577 history = get_history(
578 git_repo, path, after=after, before=before, padding=True)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800579
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800580 # Collect include information of each commit.
581 includes = {}
582 for commit_time, git_rev in history:
583 content = get_file_from_revision(git_repo, git_rev, path)
Kuang-che Wu7d0c7592019-09-16 09:59:28 +0800584 parse_result = parser_callback(path, content)
585 if parse_result is None:
586 continue
587 for include_name in parse_result:
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800588 if include_name not in includes:
589 includes[include_name] = set()
590 includes[include_name].add(git_rev)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800591
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800592 # Analyze the start time and end time of each include.
593 dependencies = []
594 for include in includes:
595 appeared = None
596 for commit_time, git_rev in history:
597 if git_rev in includes[include]:
598 if not appeared:
599 appeared = commit_time
600 else:
601 if appeared:
602 dependencies.append((include, appeared, commit_time))
603 appeared = None
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800604
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800605 if appeared is not None:
606 dependencies.append((include, appeared, before))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800607
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800608 # Recursion and merge.
609 result = list(history)
610 for include, appeared, disappeared in dependencies:
611 result += get_history_recursively(git_repo, include, appeared, disappeared,
612 parser_callback)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800613
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800614 # Sort and dedup.
615 result2 = []
Kuang-che Wuebb023c2018-11-29 15:49:32 +0800616 for x in sorted(result, key=lambda x: x[0]):
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800617 if result2 and result2[-1] == x:
618 continue
619 result2.append(x)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800620
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800621 return result2
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800622
623
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800624def list_commits_between_commits(git_repo, old, new):
625 """Get all commits between (old, new].
626
627 Args:
628 git_repo: path of git repo.
629 old: old commit hash (exclusive)
630 new: new commit hash (inclusive)
631
632 Returns:
633 list of (timestamp, rev)
634 """
635 assert old and new
636 assert old == new or is_ancestor_commit(git_repo, old, new)
637 commits = []
638 # --first-parent is necessary for Android, see following link for more
639 # discussion.
640 # https://docs.google.com/document/d/1c8qiq14_ObRRjLT62sk9r5V5cyCGHX66dLYab4MVnks/edit#heading=h.n3i6mt2n6xuu
641 for line in util.check_output(
642 'git',
643 'rev-list',
644 '--timestamp',
645 '--reverse',
646 '--first-parent',
647 '%s..%s' % (old, new),
648 cwd=git_repo).splitlines():
649 timestamp, git_rev = line.split()
650 commits.append([int(timestamp), git_rev])
651
652 # bisect-kit has a fundamental assumption that commit timestamps are
653 # increasing because we sort and bisect the commits by timestamp across git
654 # repos. If not increasing, we have to adjust the timestamp as workaround.
655 # This might lead to bad bisect result, however the bad probability is low in
656 # practice since most machines' clocks are good enough.
657 if commits != sorted(commits, key=lambda x: x[0]):
658 logger.warning('Commit timestamps are not increasing')
659 last_timestamp = -1
660 adjusted = 0
661 for commit in commits:
662 if commit[0] < last_timestamp:
663 commit[0] = last_timestamp
664 adjusted += 1
665
666 last_timestamp = commit[0]
667 logger.warning('%d timestamps adjusted', adjusted)
668
669 return commits