blob: b56da82db9a7c1a0ce7cdf4771285d035c925fd1 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wue41e0062017-09-01 19:04:14 +08002# Copyright 2017 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Git utility."""
6
7from __future__ import print_function
8import logging
Kuang-che Wubfc4a642018-04-19 11:54:08 +08009import os
Kuang-che Wue41e0062017-09-01 19:04:14 +080010import re
Kuang-che Wu3d04eda2019-09-05 23:56:40 +080011import shutil
Zheng-Jie Chang29144442020-02-18 11:53:25 +080012import stat
Kuang-che Wue41e0062017-09-01 19:04:14 +080013import subprocess
Kuang-che Wu2b1286b2019-05-20 20:37:26 +080014import time
Kuang-che Wue41e0062017-09-01 19:04:14 +080015
16from bisect_kit import cli
17from bisect_kit import util
18
19logger = logging.getLogger(__name__)
20
21GIT_FULL_COMMIT_ID_LENGTH = 40
22
23# Minimal acceptable length of git commit id.
24#
25# For chromium, hash collision rate over number of digits:
26# - 6 digits: 4.85%
27# - 7 digits: 0.32%
28# - 8 digits: 0.01%
29# As foolproof check, 7 digits should be enough.
30GIT_MIN_COMMIT_ID_LENGTH = 7
31
32
33def is_git_rev(s):
34 """Is a git hash-like version string.
35
36 It accepts shortened hash with at least 7 digits.
37 """
38 if not GIT_MIN_COMMIT_ID_LENGTH <= len(s) <= GIT_FULL_COMMIT_ID_LENGTH:
39 return False
40 return bool(re.match(r'^[0-9a-f]+$', s))
41
42
43def argtype_git_rev(s):
44 """Validates git hash."""
45 if not is_git_rev(s):
46 msg = 'should be git hash, at least %d digits' % GIT_MIN_COMMIT_ID_LENGTH
47 raise cli.ArgTypeError(msg, '1a2b3c4d5e')
48 return s
49
50
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080051def is_git_root(path):
52 """Is given path root of git repo."""
53 return os.path.exists(os.path.join(path, '.git'))
54
55
Kuang-che Wu08366542019-01-12 12:37:49 +080056def is_git_bare_dir(path):
57 """Is inside .git folder or bare git checkout."""
58 if not os.path.isdir(path):
59 return False
60 try:
61 return util.check_output(
62 'git', 'rev-parse', '--is-bare-repository', cwd=path) == 'true\n'
63 except subprocess.CalledProcessError:
64 return False
65
66
Kuang-che Wu6948ecc2018-09-11 17:43:49 +080067def clone(git_repo, repo_url, reference=None):
68 if not os.path.exists(git_repo):
69 os.makedirs(git_repo)
70 cmd = ['git', 'clone', repo_url, '.']
71 if reference:
72 cmd += ['--reference', reference]
73 util.check_call(*cmd, cwd=git_repo)
74
75
Kuang-che Wue41e0062017-09-01 19:04:14 +080076def checkout_version(git_repo, rev):
77 """git checkout.
78
79 Args:
80 git_repo: path of git repo.
81 rev: git commit revision to checkout.
82 """
83 util.check_call('git', 'checkout', '-q', '-f', rev, cwd=git_repo)
84
85
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +080086def init(git_repo):
87 """git init.
88
89 git_repo and its parent directories will be created if they don't exist.
90
91 Args:
92 git_repo: path of git repo.
93 """
94 if not os.path.exists(git_repo):
95 os.makedirs(git_repo)
96
97 util.check_call('git', 'init', '-q', cwd=git_repo)
98
99
100def commit_file(git_repo,
101 path,
102 message,
103 content,
104 commit_time=None,
105 author_time=None):
106 """Commit a file.
107
108 Args:
109 git_repo: path of git repo
110 path: file path, relative to git_repo
111 message: commit message
112 content: file content
113 commit_time: commit timestamp
114 author_time: author timestamp
115 """
116 if author_time is None:
117 author_time = commit_time
118
119 env = {}
120 if author_time:
121 env['GIT_AUTHOR_DATE'] = str(author_time)
122 if commit_time:
123 env['GIT_COMMITTER_DATE'] = str(commit_time)
124
125 full_path = os.path.join(git_repo, path)
126 dirname = os.path.dirname(full_path)
127 if not os.path.exists(dirname):
128 os.makedirs(dirname)
129 with open(full_path, 'w') as f:
130 f.write(content)
131
132 util.check_call('git', 'add', path, cwd=git_repo)
133 util.check_call(
134 'git', 'commit', '-q', '-m', message, path, cwd=git_repo, env=env)
135
136
Kuang-che Wu1e49f512018-12-06 15:27:42 +0800137def config(git_repo, *args):
138 """Wrapper of 'git config'.
139
140 Args:
141 git_repo: path of git repo.
142 args: parameters pass to 'git config'
143 """
144 util.check_call('git', 'config', *args, cwd=git_repo)
145
146
147def fetch(git_repo, *args):
Kuang-che Wu2b1286b2019-05-20 20:37:26 +0800148 """Wrapper of 'git fetch' with retry support.
Kuang-che Wu1e49f512018-12-06 15:27:42 +0800149
150 Args:
151 git_repo: path of git repo.
152 args: parameters pass to 'git fetch'
153 """
Kuang-che Wu5bea88a2020-03-16 10:55:24 +0800154 tries = 0
155 while True:
156 tries += 1
Kuang-che Wu2b1286b2019-05-20 20:37:26 +0800157 stderr_lines = []
158 try:
159 util.check_call(
160 'git',
161 'fetch',
162 *args,
163 cwd=git_repo,
164 stderr_callback=stderr_lines.append)
Kuang-che Wu5bea88a2020-03-16 10:55:24 +0800165 return
Kuang-che Wu2b1286b2019-05-20 20:37:26 +0800166 except subprocess.CalledProcessError:
Kuang-che Wu5bea88a2020-03-16 10:55:24 +0800167 if tries >= 5:
168 logger.error('git fetch failed too much times')
169 raise
Kuang-che Wu2b1286b2019-05-20 20:37:26 +0800170 stderr = ''.join(stderr_lines)
171 # only retry 5xx internal server error
Kuang-che Wu5bea88a2020-03-16 10:55:24 +0800172 if 'The requested URL returned error: 5' in stderr:
173 delay = min(60, 10 * 2**tries)
174 logger.warning('git fetch failed, will retry %s seconds later', delay)
175 time.sleep(delay)
176 continue
177 raise
Kuang-che Wu1e49f512018-12-06 15:27:42 +0800178
179
Kuang-che Wue41e0062017-09-01 19:04:14 +0800180def is_containing_commit(git_repo, rev):
181 """Determines given commit exists.
182
183 Args:
184 git_repo: path of git repo.
185 rev: git commit revision in query.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800186
187 Returns:
188 True if rev is inside given git repo. If git_repo is not a git folder,
189 returns False as well.
Kuang-che Wue41e0062017-09-01 19:04:14 +0800190 """
191 try:
192 return util.check_output(
193 'git', 'cat-file', '-t', rev, cwd=git_repo) == 'commit\n'
194 except subprocess.CalledProcessError:
195 return False
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800196 except OSError:
197 return False
Kuang-che Wue41e0062017-09-01 19:04:14 +0800198
199
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800200def is_ancestor_commit(git_repo, old, new):
201 """Determines `old` commit is ancestor of `new` commit.
202
203 Args:
204 git_repo: path of git repo.
205 old: the ancestor commit.
206 new: the descendant commit.
207
208 Returns:
209 True only if `old` is the ancestor of `new`. One commit is not considered
210 as ancestor of itself.
211 """
212 return util.check_output(
213 'git',
214 'rev-list',
215 '--ancestry-path',
216 '-1',
217 '%s..%s' % (old, new),
218 cwd=git_repo) != ''
219
220
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800221def get_commit_metadata(git_repo, rev):
222 """Get metadata of given commit.
223
224 Args:
225 git_repo: path of git repo.
226 rev: git commit revision in query.
227
228 Returns:
229 dict of metadata, including (if available):
230 tree: hash of git tree object
231 parent: list of parent commits; this field is unavailable for the very
232 first commit of git repo.
233 author: name and email of author
234 author_time: author timestamp (without timezone information)
235 committer: name and email of committer
236 committer_time: commit timestamp (without timezone information)
237 message: commit message text
238 """
239 meta = {}
240 data = util.check_output(
Kuang-che Wubcafc552019-08-15 15:27:02 +0800241 'git', 'cat-file', '-p', rev, cwd=git_repo, log_stdout=False)
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800242 header, meta['message'] = data.split('\n\n', 1)
243 for line in header.splitlines():
244 m = re.match(r'^tree (\w+)', line)
245 if m:
246 meta['tree'] = m.group(1)
247 continue
248
249 m = re.match(r'^parent (\w+)', line)
250 if m:
251 meta['parent'] = line.split()[1:]
252 continue
253
254 m = re.match(r'^(author|committer) (.*) (\d+) (\S+)$', line)
255 if m:
256 meta[m.group(1)] = m.group(2)
257 meta['%s_time' % m.group(1)] = int(m.group(3))
258 continue
259 return meta
260
261
Kuang-che Wue41e0062017-09-01 19:04:14 +0800262def get_revlist(git_repo, old, new):
263 """Enumerates git commit between two revisions (inclusive).
264
265 Args:
266 git_repo: path of git repo.
267 old: git commit revision.
268 new: git commit revision.
269
270 Returns:
271 list of git revisions. The list contains the input revisions, old and new.
272 """
273 assert old
274 assert new
275 cmd = ['git', 'rev-list', '--reverse', '%s^..%s' % (old, new)]
276 revlist = util.check_output(*cmd, cwd=git_repo).splitlines()
277 return revlist
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800278
279
280def get_commit_log(git_repo, rev):
281 """Get git commit log.
282
283 Args:
284 git_repo: path of git repo.
285 rev: git commit revision.
286
287 Returns:
288 commit log message
289 """
290 cmd = ['git', 'log', '-1', '--format=%B', rev]
291 msg = util.check_output(*cmd, cwd=git_repo)
292 return msg
293
294
Kuang-che Wu68db08a2018-03-30 11:50:34 +0800295def get_commit_hash(git_repo, rev):
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800296 """Get git commit hash.
297
298 Args:
299 git_repo: path of git repo.
300 rev: could be git tag, branch, or (shortened) commit hash
301
302 Returns:
303 full git commit hash
Kuang-che Wu5e7c9b02019-01-03 21:16:01 +0800304
305 Raises:
306 ValueError: `rev` is not unique or doesn't exist
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800307 """
Kuang-che Wu5e7c9b02019-01-03 21:16:01 +0800308 try:
309 # Use '^{commit}' to restrict search only commits.
310 # Use '--' to avoid ambiguity, like matching rev against path name.
311 output = util.check_output(
312 'git', 'rev-parse', '%s^{commit}' % rev, '--', cwd=git_repo)
313 git_rev = output.rstrip('-\n')
314 except subprocess.CalledProcessError:
315 # Do not use 'git rev-parse --disambiguate' to determine uniqueness
316 # because it searches objects other than commits as well.
317 raise ValueError('%s is not unique or does not exist' % rev)
318 assert is_git_rev(git_rev)
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800319 return git_rev
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800320
321
Zheng-Jie Chang868c1752020-01-21 14:42:41 +0800322def get_commit_time(git_repo, rev, path=None):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800323 """Get git commit timestamp.
324
325 Args:
326 git_repo: path of git repo
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800327 rev: git commit id, branch name, tag name, or other git object
328 path: path, relative to git_repo
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800329
330 Returns:
331 timestamp (int)
332 """
Zheng-Jie Chang868c1752020-01-21 14:42:41 +0800333 cmd = ['git', 'log', '-1', '--format=%ct', rev]
334 if path:
335 cmd += ['--', path]
336 line = util.check_output(*cmd, cwd=git_repo)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800337 return int(line)
338
339
Zheng-Jie Chang1ace3012020-02-15 04:51:05 +0800340def is_symbolic_link(git_repo, rev, path):
341 """Check if a file is symbolic link.
342
343 Args:
344 git_repo: path of git repo
345 rev: git commit id
346 path: file path
347
348 Returns:
349 True if the specified file is a symbolic link in repo.
Zheng-Jie Chang29144442020-02-18 11:53:25 +0800350
351 Raises:
352 ValueError if not found
Zheng-Jie Chang1ace3012020-02-15 04:51:05 +0800353 """
Zheng-Jie Chang29144442020-02-18 11:53:25 +0800354 # format: 120000 blob 8735a8c1dd96ede39a21d983d5c96792fd15c1a5 default.xml
Kuang-che Wu5bffed82020-05-27 10:50:51 +0800355 # TODO(kcwu): handle escaped path with special characters
Zheng-Jie Chang29144442020-02-18 11:53:25 +0800356 splitted = util.check_output(
357 'git', 'ls-tree', rev, '--full-name', path, cwd=git_repo).split()
358 if len(splitted) >= 4 and splitted[3] == path:
359 return stat.S_ISLNK(int(splitted[0], 8))
Zheng-Jie Chang1ace3012020-02-15 04:51:05 +0800360
Kuang-che Wud1b74152020-05-20 08:46:46 +0800361 raise ValueError('file %s is not found in repo:%s rev:%s' %
362 (path, git_repo, rev))
Zheng-Jie Chang1ace3012020-02-15 04:51:05 +0800363
364
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800365def get_file_from_revision(git_repo, rev, path):
366 """Get file content of given revision.
367
368 Args:
369 git_repo: path of git repo
370 rev: git commit id
371 path: file path
372
373 Returns:
374 file content (str)
375 """
Zheng-Jie Chang1ace3012020-02-15 04:51:05 +0800376 result = util.check_output(
Kuang-che Wubcafc552019-08-15 15:27:02 +0800377 'git', 'show', '%s:%s' % (rev, path), cwd=git_repo, log_stdout=False)
Kuang-che Wu5bffed82020-05-27 10:50:51 +0800378
379 # It might be a symbolic link.
380 # In extreme case, it's possible that filenames contain special characters,
381 # like newlines. In practice, it should be safe to assume no such cases and
382 # reduce disk i/o.
383 if '\n' not in result and is_symbolic_link(git_repo, rev, path):
Zheng-Jie Chang1ace3012020-02-15 04:51:05 +0800384 return get_file_from_revision(git_repo, rev, result)
Kuang-che Wu5bffed82020-05-27 10:50:51 +0800385
Zheng-Jie Chang1ace3012020-02-15 04:51:05 +0800386 return result
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800387
388
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800389def list_dir_from_revision(git_repo, rev, path):
390 """Lists entries of directory of given revision.
391
392 Args:
393 git_repo: path of git repo
394 rev: git commit id
395 path: directory path, relative to git root
396
397 Returns:
398 list of names
399
400 Raises:
401 subprocess.CalledProcessError: if `path` doesn't exists in `rev`
402 """
403 return util.check_output(
404 'git',
405 'ls-tree',
406 '--name-only',
407 '%s:%s' % (rev, path),
408 cwd=git_repo,
Kuang-che Wubcafc552019-08-15 15:27:02 +0800409 log_stdout=False).splitlines()
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800410
411
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800412def get_rev_by_time(git_repo, timestamp, branch, path=None):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800413 """Query commit of given time.
414
415 Args:
416 git_repo: path of git repo.
417 timestamp: timestamp
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800418 branch: only query parent of the `branch`. If branch=None, it means 'HEAD'
419 (current branch, usually).
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800420 path: only query history of path, relative to git_repo
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800421
422 Returns:
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800423 git commit hash. None if path didn't exist at the given time.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800424 """
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800425 if not branch:
426 branch = 'HEAD'
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800427
428 cmd = [
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800429 'git',
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800430 'rev-list',
431 '--first-parent',
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800432 '-1',
433 '--before',
434 str(timestamp),
Kuang-che Wu89ac2e72018-07-25 17:39:07 +0800435 branch,
436 ]
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800437 if path:
438 cmd += ['--', path]
439
440 result = util.check_output(*cmd, cwd=git_repo).strip()
441 return result or None
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800442
443
Kuang-che Wu3d04eda2019-09-05 23:56:40 +0800444def reset_hard(git_repo):
445 """Restore modified and deleted files.
446
447 This is simply wrapper of "git reset --hard".
448
449 Args:
450 git_repo: path of git repo.
451 """
452 util.check_call('git', 'reset', '--hard', cwd=git_repo)
453
454
455def list_untracked(git_repo, excludes=None):
456 """List untracked files and directories.
457
458 Args:
459 git_repo: path of git repo.
460 excludes: files and/or directories to ignore, relative to git_repo
461
462 Returns:
463 list of paths, relative to git_repo
464 """
465 exclude_flags = []
466 if excludes:
467 for exclude in excludes:
468 assert not os.path.isabs(exclude), 'should be relative'
469 exclude_flags += ['--exclude', '/' + re.escape(exclude)]
470
471 result = []
472 for path in util.check_output(
473 'git',
474 'ls-files',
475 '--others',
476 '--exclude-standard',
477 *exclude_flags,
478 cwd=git_repo).splitlines():
479 # Remove the trailing slash, which means directory.
480 path = path.rstrip('/')
481 result.append(path)
482 return result
483
484
485def distclean(git_repo, excludes=None):
486 """Clean up git repo directory.
487
488 Restore modified and deleted files. Delete untracked files.
489
490 Args:
491 git_repo: path of git repo.
492 excludes: files and/or directories to ignore, relative to git_repo
493 """
494 reset_hard(git_repo)
495
496 # Delete untracked files.
497 for untracked in list_untracked(git_repo, excludes=excludes):
498 path = os.path.join(git_repo, untracked)
499 logger.debug('delete untracked: %s', path)
Zheng-Jie Chang42fd2e42020-04-10 07:21:23 +0800500 if os.path.islink(path):
501 os.unlink(path)
502 elif os.path.isdir(path):
Kuang-che Wu3d04eda2019-09-05 23:56:40 +0800503 shutil.rmtree(path)
504 else:
505 os.unlink(path)
506
507
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800508def get_history(git_repo,
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +0800509 path=None,
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800510 branch=None,
511 after=None,
512 before=None,
Zheng-Jie Chang313eec32020-02-18 16:17:07 +0800513 padding_begin=False,
514 padding_end=False,
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800515 with_subject=False):
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800516 """Get commit history of given path.
517
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800518 `after` and `before` could be outside of lifetime of `path`. `padding` is
519 used to control what to return for such cases.
520
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800521 Args:
522 git_repo: path of git repo.
523 path: path to query, relative to git_repo
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800524 branch: branch name or ref name
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800525 after: limit history after given time (inclusive)
526 before: limit history before given time (inclusive)
Zheng-Jie Chang313eec32020-02-18 16:17:07 +0800527 padding_begin: If True, pads returned result with dummy record at exact
528 'after' time, if 'path' existed at that time.
529 padding_end: If True, pads returned result with dummy record at exact
530 'before' time, if 'path' existed at that time.
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800531 with_subject: If True, return commit subject together
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800532
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800533 Returns:
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800534 List of (timestamp, git hash, subject); or (timestamp, git hash) depends
535 on with_subject flag. They are all events when `path` was added, removed,
536 modified, and start and end time if `padding` is true. If `padding` and
537 `with_subject` are both true, 'dummy subject' will be returned as padding
538 history's subject.
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800539
540 For each pair, at `timestamp`, the repo state is `git hash`. In other
541 words, `timestamp` is not necessary the commit time of `git hash` for the
542 padded entries.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800543 """
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800544 log_format = '%ct %H' if not with_subject else '%ct %H %s'
545 cmd = ['git', 'log', '--reverse', '--first-parent', '--format=' + log_format]
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800546 if after:
547 cmd += ['--after', str(after)]
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800548 if before:
549 cmd += ['--before', str(before)]
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800550 if branch:
551 assert not is_git_rev(branch)
552 cmd += [branch]
Zheng-Jie Chang0fc704b2019-12-09 18:43:38 +0800553 if path:
554 # '--' is necessary otherwise if `path` is removed in current revision, git
555 # will complain it's an ambiguous argument which may be path or something
556 # else (like git branch name, tag name, etc.)
557 cmd += ['--', path]
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800558
559 result = []
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800560 for line in util.check_output(*cmd, cwd=git_repo).splitlines():
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800561 # array = [timestamp, git_rev, subject] or [timestamp, git_rev]
562 array = line.split(' ', 2)
563 array[0] = int(array[0])
564 result.append(tuple(array))
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800565
Zheng-Jie Chang313eec32020-02-18 16:17:07 +0800566 if padding_begin or padding_end:
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800567 history = [0, '']
568 if with_subject:
569 history.append('dummy subject')
570
Zheng-Jie Chang313eec32020-02-18 16:17:07 +0800571 if padding_end:
572 assert before, 'padding_end=True make no sense if before=None'
573 if get_rev_by_time(git_repo, before, branch, path=path):
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800574 before = int(before)
575 if not result or result[-1][0] != before:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800576 git_rev = get_rev_by_time(git_repo, before, branch)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800577 assert git_rev
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800578 history[0:2] = [before, git_rev]
579 result.append(tuple(history))
Zheng-Jie Chang313eec32020-02-18 16:17:07 +0800580
581 if padding_begin:
582 assert after, 'padding_begin=True make no sense if after=None'
583 if get_rev_by_time(git_repo, after, branch, path=path):
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800584 after = int(after)
585 if not result or result[0][0] != after:
Kuang-che Wu8a28a9d2018-09-11 17:43:36 +0800586 git_rev = get_rev_by_time(git_repo, after, branch)
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800587 assert git_rev
Zheng-Jie Chang127c3302019-09-10 17:17:04 +0800588 history[0:2] = [after, git_rev]
589 result.insert(0, tuple(history))
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800590
591 return result
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800592
593
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800594def get_history_recursively(git_repo,
595 path,
596 after,
597 before,
598 parser_callback,
Zheng-Jie Chang313eec32020-02-18 16:17:07 +0800599 padding_end=True,
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800600 branch=None):
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800601 """Get commit history of given path and its dependencies.
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800602
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800603 In comparison to get_history(), get_history_recursively also takes
604 dependencies into consideration. For example, if file A referenced file B,
605 get_history_recursively(A) will return commits of B in addition to A. This
606 applies recursively, so commits of C will be included if file B referenced
607 file C, and so on.
608
609 This function is file type neutral. `parser_callback(filename, content)` will
610 be invoked to parse file content and should return list of filename of
Kuang-che Wu7d0c7592019-09-16 09:59:28 +0800611 dependencies. If `parser_callback` returns None (usually syntax error), the
612 commit is omitted.
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800613
614 Args:
615 git_repo: path of git repo
616 path: path to query, relative to git_repo
617 after: limit history after given time (inclusive)
618 before: limit history before given time (inclusive)
619 parser_callback: callback to parse file content. See above comment.
Zheng-Jie Chang313eec32020-02-18 16:17:07 +0800620 padding_end: If True, pads returned result with dummy record at exact
621 'after' time, if 'path' existed at that time.
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800622 branch: branch name or ref name
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800623
624 Returns:
625 list of (commit timestamp, git hash)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800626 """
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800627 history = get_history(
Zheng-Jie Chang313eec32020-02-18 16:17:07 +0800628 git_repo,
629 path,
630 after=after,
631 before=before,
632 padding_begin=True,
633 branch=branch)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800634
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800635 # Collect include information of each commit.
636 includes = {}
637 for commit_time, git_rev in history:
638 content = get_file_from_revision(git_repo, git_rev, path)
Kuang-che Wu7d0c7592019-09-16 09:59:28 +0800639 parse_result = parser_callback(path, content)
640 if parse_result is None:
641 continue
642 for include_name in parse_result:
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800643 if include_name not in includes:
644 includes[include_name] = set()
645 includes[include_name].add(git_rev)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800646
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800647 # Analyze the start time and end time of each include.
648 dependencies = []
649 for include in includes:
650 appeared = None
651 for commit_time, git_rev in history:
652 if git_rev in includes[include]:
653 if not appeared:
654 appeared = commit_time
655 else:
656 if appeared:
Zheng-Jie Chang4d617a42020-02-15 06:46:00 +0800657 # dependency file exists in time range [appeared, commit_time)
658 dependencies.append((include, appeared, commit_time - 1))
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800659 appeared = None
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800660
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800661 if appeared is not None:
662 dependencies.append((include, appeared, before))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800663
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800664 # Recursion and merge.
665 result = list(history)
666 for include, appeared, disappeared in dependencies:
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800667 result += get_history_recursively(
668 git_repo,
669 include,
670 appeared,
671 disappeared,
672 parser_callback,
Zheng-Jie Chang313eec32020-02-18 16:17:07 +0800673 padding_end=False,
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800674 branch=branch)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800675
Zheng-Jie Chang313eec32020-02-18 16:17:07 +0800676 # Sort and padding.
677 result.sort(key=lambda x: x[0])
678 if padding_end:
679 pad = (before,)
680 pad += result[-1][1:]
681 result.append(pad)
682
683 # Dedup.
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800684 result2 = []
Zheng-Jie Chang313eec32020-02-18 16:17:07 +0800685 for x in result:
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800686 if result2 and result2[-1] == x:
687 continue
688 result2.append(x)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800689
Kuang-che Wue4bae0b2018-07-19 12:10:14 +0800690 return result2
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800691
692
Zheng-Jie Changd968f552020-01-16 13:31:57 +0800693def get_branches(git_repo, all_branches=True, commit=None):
694 """Get branches of a repository.
695
696 Args:
697 git_repo: path of git repo
698 all_branches: return remote branches if is set to True
699 commit: return branches containing this commit if is not None
700
701 Returns:
702 list of branch names
703 """
704 cmd = ['git', 'branch', '--format=%(refname)']
705 if all_branches:
706 cmd += ['-a']
707 if commit:
708 cmd += ['--contains', commit]
709
710 result = []
711 for line in util.check_output(*cmd, cwd=git_repo).splitlines():
712 result.append(line.strip())
713 return result
714
715
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800716def list_commits_between_commits(git_repo, old, new):
717 """Get all commits between (old, new].
718
719 Args:
720 git_repo: path of git repo.
721 old: old commit hash (exclusive)
722 new: new commit hash (inclusive)
723
724 Returns:
725 list of (timestamp, rev)
726 """
727 assert old and new
Kuang-che Wu470866e2020-05-27 11:03:10 +0800728 if old == new:
729 return []
730
731 assert is_ancestor_commit(git_repo, old, new)
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800732 commits = []
733 # --first-parent is necessary for Android, see following link for more
734 # discussion.
735 # https://docs.google.com/document/d/1c8qiq14_ObRRjLT62sk9r5V5cyCGHX66dLYab4MVnks/edit#heading=h.n3i6mt2n6xuu
736 for line in util.check_output(
737 'git',
738 'rev-list',
739 '--timestamp',
740 '--reverse',
741 '--first-parent',
742 '%s..%s' % (old, new),
743 cwd=git_repo).splitlines():
744 timestamp, git_rev = line.split()
745 commits.append([int(timestamp), git_rev])
746
747 # bisect-kit has a fundamental assumption that commit timestamps are
748 # increasing because we sort and bisect the commits by timestamp across git
749 # repos. If not increasing, we have to adjust the timestamp as workaround.
750 # This might lead to bad bisect result, however the bad probability is low in
751 # practice since most machines' clocks are good enough.
752 if commits != sorted(commits, key=lambda x: x[0]):
753 logger.warning('Commit timestamps are not increasing')
754 last_timestamp = -1
755 adjusted = 0
756 for commit in commits:
757 if commit[0] < last_timestamp:
758 commit[0] = last_timestamp
759 adjusted += 1
760
761 last_timestamp = commit[0]
762 logger.warning('%d timestamps adjusted', adjusted)
763
764 return commits