blob: 0c393b2ff0bef277e14cbb9d25f4818ae960105e [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wue41e0062017-09-01 19:04:14 +08002# Copyright 2017 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Git utility."""
6
7from __future__ import print_function
8import logging
Kuang-che Wubfc4a642018-04-19 11:54:08 +08009import os
Kuang-che Wue41e0062017-09-01 19:04:14 +080010import re
11import subprocess
12
13from bisect_kit import cli
14from bisect_kit import util
15
16logger = logging.getLogger(__name__)
17
18GIT_FULL_COMMIT_ID_LENGTH = 40
19
20# Minimal acceptable length of git commit id.
21#
22# For chromium, hash collision rate over number of digits:
23# - 6 digits: 4.85%
24# - 7 digits: 0.32%
25# - 8 digits: 0.01%
26# As foolproof check, 7 digits should be enough.
27GIT_MIN_COMMIT_ID_LENGTH = 7
28
29
30def is_git_rev(s):
31 """Is a git hash-like version string.
32
33 It accepts shortened hash with at least 7 digits.
34 """
35 if not GIT_MIN_COMMIT_ID_LENGTH <= len(s) <= GIT_FULL_COMMIT_ID_LENGTH:
36 return False
37 return bool(re.match(r'^[0-9a-f]+$', s))
38
39
40def argtype_git_rev(s):
41 """Validates git hash."""
42 if not is_git_rev(s):
43 msg = 'should be git hash, at least %d digits' % GIT_MIN_COMMIT_ID_LENGTH
44 raise cli.ArgTypeError(msg, '1a2b3c4d5e')
45 return s
46
47
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080048def is_git_root(path):
49 """Is given path root of git repo."""
50 return os.path.exists(os.path.join(path, '.git'))
51
52
Kuang-che Wue41e0062017-09-01 19:04:14 +080053def checkout_version(git_repo, rev):
54 """git checkout.
55
56 Args:
57 git_repo: path of git repo.
58 rev: git commit revision to checkout.
59 """
60 util.check_call('git', 'checkout', '-q', '-f', rev, cwd=git_repo)
61
62
63def is_containing_commit(git_repo, rev):
64 """Determines given commit exists.
65
66 Args:
67 git_repo: path of git repo.
68 rev: git commit revision in query.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080069
70 Returns:
71 True if rev is inside given git repo. If git_repo is not a git folder,
72 returns False as well.
Kuang-che Wue41e0062017-09-01 19:04:14 +080073 """
74 try:
75 return util.check_output(
76 'git', 'cat-file', '-t', rev, cwd=git_repo) == 'commit\n'
77 except subprocess.CalledProcessError:
78 return False
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080079 except OSError:
80 return False
Kuang-che Wue41e0062017-09-01 19:04:14 +080081
82
Kuang-che Wubfc4a642018-04-19 11:54:08 +080083def is_ancestor_commit(git_repo, old, new):
84 """Determines `old` commit is ancestor of `new` commit.
85
86 Args:
87 git_repo: path of git repo.
88 old: the ancestor commit.
89 new: the descendant commit.
90
91 Returns:
92 True only if `old` is the ancestor of `new`. One commit is not considered
93 as ancestor of itself.
94 """
95 return util.check_output(
96 'git',
97 'rev-list',
98 '--ancestry-path',
99 '-1',
100 '%s..%s' % (old, new),
101 cwd=git_repo) != ''
102
103
Kuang-che Wue41e0062017-09-01 19:04:14 +0800104def get_revlist(git_repo, old, new):
105 """Enumerates git commit between two revisions (inclusive).
106
107 Args:
108 git_repo: path of git repo.
109 old: git commit revision.
110 new: git commit revision.
111
112 Returns:
113 list of git revisions. The list contains the input revisions, old and new.
114 """
115 assert old
116 assert new
117 cmd = ['git', 'rev-list', '--reverse', '%s^..%s' % (old, new)]
118 revlist = util.check_output(*cmd, cwd=git_repo).splitlines()
119 return revlist
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800120
121
122def get_commit_log(git_repo, rev):
123 """Get git commit log.
124
125 Args:
126 git_repo: path of git repo.
127 rev: git commit revision.
128
129 Returns:
130 commit log message
131 """
132 cmd = ['git', 'log', '-1', '--format=%B', rev]
133 msg = util.check_output(*cmd, cwd=git_repo)
134 return msg
135
136
Kuang-che Wu68db08a2018-03-30 11:50:34 +0800137def get_commit_hash(git_repo, rev):
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800138 """Get git commit hash.
139
140 Args:
141 git_repo: path of git repo.
142 rev: could be git tag, branch, or (shortened) commit hash
143
144 Returns:
145 full git commit hash
146 """
147 cmd = ['git', 'rev-parse', rev]
Kuang-che Wu68db08a2018-03-30 11:50:34 +0800148 git_rev = util.check_output(*cmd, cwd=git_repo).strip()
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800149 assert git_rev
150 return git_rev
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800151
152
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800153def get_commit_time(git_repo, rev):
154 """Get git commit timestamp.
155
156 Args:
157 git_repo: path of git repo
158 rev: git commit id
159
160 Returns:
161 timestamp (int)
162 """
163 line = util.check_output(
164 'git', 'log', '-1', '--format=%ct', rev, cwd=git_repo)
165 return int(line)
166
167
168def get_file_from_revision(git_repo, rev, path):
169 """Get file content of given revision.
170
171 Args:
172 git_repo: path of git repo
173 rev: git commit id
174 path: file path
175
176 Returns:
177 file content (str)
178 """
179 return util.check_output(
180 'git', 'show', '%s:%s' % (rev, path), cwd=git_repo, log_output=False)
181
182
183def get_rev_by_time(git_repo, timestamp, *args):
184 """Query commit of given time.
185
186 Args:
187 git_repo: path of git repo.
188 timestamp: timestamp
189 args: only the selected subset of history to query. If branch name is
190 specified, only parent of the said branch is queried. If omitted, only
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800191 queries the parent of HEAD.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800192
193 Returns:
194 git commit hash
195 """
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800196 if not args:
197 args = ['HEAD']
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800198 return util.check_output(
199 'git',
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800200 'rev-list',
201 '--first-parent',
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800202 '-1',
203 '--before',
204 str(timestamp),
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800205 *args,
206 cwd=git_repo).strip()
207
208
209def get_history(git_repo, path, after=None):
210 """Get commit history of given path.
211
212 Args:
213 git_repo: path of git repo.
214 path: path to query, relative to git_repo
215 after: limit history after given time
216
217 Yields:
218 commit timestamp, git hash
219 """
Kuang-che Wud1d45b42018-07-05 00:46:45 +0800220 cmd = ['git', 'log', '--reverse', '--first-parent', '--format=%ct %H']
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800221 if after:
222 cmd += ['--after', str(after)]
223 cmd.append(path)
224 for line in util.check_output(*cmd, cwd=git_repo).splitlines():
225 commit_time, git_rev = line.split()
226 yield int(commit_time), git_rev
227
228
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800229class Diff(object):
230 """Class to describe the difference between git commits.
231
232 Attributes:
233 timestamp: commit timestamp
234 path: git repo path relative to project root
235 action: action to make the diff, possible value: CHECKOUT_TO, ADD, REMOVE.
236 git_rev: git commit hash
237 """
238 CHECKOUT_TO = 'checkout_to'
239 ADD = 'add'
240 REMOVE = 'remove'
241
242 def __init__(self, timestamp, path, action, git_rev=None):
243 self.timestamp = timestamp
244 self.path = path
245 self.action = action
246 self.git_rev = git_rev
247
248 def apply(self, base_dir):
249 """Applies the diff on disk.
250
251 Args:
252 base_dir: the project root where self.path is relative to
253 """
254 assert self.path
255 git_repo = os.path.join(base_dir, self.path)
256 if self.action == Diff.CHECKOUT_TO:
257 checkout_version(git_repo, self.git_rev)
258 return
259 if self.action in ['add', 'remove']:
260 raise NotImplementedError
261 assert 0
262
263 def summary(self, base_dir):
264 """Summary string of this diff.
265
266 Args:
267 base_dir: the project root where self.path is relative to
268 """
269 if self.action == Diff.CHECKOUT_TO:
270 git_repo = os.path.join(base_dir, self.path)
271 summary = get_commit_log(git_repo, self.git_rev).splitlines()[0]
272 return '%s %s %r' % (self.git_rev[:10], self.path, summary)
273 return '%s %s' % (self.action, self.path)
274
275 def __eq__(self, rhs):
276 return (self.timestamp == rhs.timestamp and self.path == rhs.path and
277 self.action == rhs.action and self.git_rev == rhs.git_rev)
278
279
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800280def list_commits_between_commits(git_repo, old, new):
281 """Get all commits between (old, new].
282
283 Args:
284 git_repo: path of git repo.
285 old: old commit hash (exclusive)
286 new: new commit hash (inclusive)
287
288 Returns:
289 list of (timestamp, rev)
290 """
291 assert old and new
292 assert old == new or is_ancestor_commit(git_repo, old, new)
293 commits = []
294 # --first-parent is necessary for Android, see following link for more
295 # discussion.
296 # https://docs.google.com/document/d/1c8qiq14_ObRRjLT62sk9r5V5cyCGHX66dLYab4MVnks/edit#heading=h.n3i6mt2n6xuu
297 for line in util.check_output(
298 'git',
299 'rev-list',
300 '--timestamp',
301 '--reverse',
302 '--first-parent',
303 '%s..%s' % (old, new),
304 cwd=git_repo).splitlines():
305 timestamp, git_rev = line.split()
306 commits.append([int(timestamp), git_rev])
307
308 # bisect-kit has a fundamental assumption that commit timestamps are
309 # increasing because we sort and bisect the commits by timestamp across git
310 # repos. If not increasing, we have to adjust the timestamp as workaround.
311 # This might lead to bad bisect result, however the bad probability is low in
312 # practice since most machines' clocks are good enough.
313 if commits != sorted(commits, key=lambda x: x[0]):
314 logger.warning('Commit timestamps are not increasing')
315 last_timestamp = -1
316 adjusted = 0
317 for commit in commits:
318 if commit[0] < last_timestamp:
319 commit[0] = last_timestamp
320 adjusted += 1
321
322 last_timestamp = commit[0]
323 logger.warning('%d timestamps adjusted', adjusted)
324
325 return commits
326
327
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800328def get_difflist_between_two_commit(base_dir, path, old, new):
329 """Get difflist between (old, new].
330
331 Args:
332 base_dir: the project root
333 path: the path relative to the project root
334 old: old commit hash (exclusive)
335 new: new commit hash (inclusive)
336
337 Returns:
338 list of Diff objects
339 """
340 git_repo = os.path.join(base_dir, path)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800341 difflist = []
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800342 for timestamp, git_rev in list_commits_between_commits(git_repo, old, new):
343 difflist.append(Diff(timestamp, path, Diff.CHECKOUT_TO, git_rev))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800344 return difflist
345
346
347def get_difflist_between_two_set(base_dir, old_set, new_set):
348 result = []
349 for path in set(old_set) | set(new_set):
350 git_repo = os.path.join(base_dir, path)
351 if path in old_set and path in new_set:
352 old = old_set[path]
353 new = new_set[path]
354 if old == new:
355 # nochange, do nothing
356 pass
357 elif is_ancestor_commit(git_repo, old, new):
358 # normal case
359 for diff in get_difflist_between_two_commit(base_dir, path, old, new):
360 result.append(diff)
361 else:
362 # maybe switch branch?
363 # TODO(kcwu): handle discontinuous properly (crbug.com/827092)
364 logger.warning(
365 'Warning: dependency "%s" discontinuous. Not supported yet', path)
366 return []
367 elif path in old_set:
368 # remove dependency
369 # TODO(kcwu): handle removal properly (crbug.com/827092)
370 logger.warning('Warning: dependency "%s" was removed. Not supported yet',
371 path)
372 return []
373 else:
374 assert path in new_set
375 # add dependency
376 # TODO(kcwu): handle addition properly (crbug.com/827092)
377 logger.warning('Warning: dependency "%s" was added. Not supported yet',
378 path)
379 return []
380
381 result.sort(key=lambda diff: (diff.timestamp, diff.path))
382
383 return result