blob: 98f3d4e7d49534ea9ba3be1df46ea7dd7455c905 [file] [log] [blame]
Kuang-che Wue41e0062017-09-01 19:04:14 +08001# Copyright 2017 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Git utility."""
5
6from __future__ import print_function
7import logging
Kuang-che Wubfc4a642018-04-19 11:54:08 +08008import os
Kuang-che Wue41e0062017-09-01 19:04:14 +08009import re
10import subprocess
11
12from bisect_kit import cli
13from bisect_kit import util
14
15logger = logging.getLogger(__name__)
16
17GIT_FULL_COMMIT_ID_LENGTH = 40
18
19# Minimal acceptable length of git commit id.
20#
21# For chromium, hash collision rate over number of digits:
22# - 6 digits: 4.85%
23# - 7 digits: 0.32%
24# - 8 digits: 0.01%
25# As foolproof check, 7 digits should be enough.
26GIT_MIN_COMMIT_ID_LENGTH = 7
27
28
29def is_git_rev(s):
30 """Is a git hash-like version string.
31
32 It accepts shortened hash with at least 7 digits.
33 """
34 if not GIT_MIN_COMMIT_ID_LENGTH <= len(s) <= GIT_FULL_COMMIT_ID_LENGTH:
35 return False
36 return bool(re.match(r'^[0-9a-f]+$', s))
37
38
39def argtype_git_rev(s):
40 """Validates git hash."""
41 if not is_git_rev(s):
42 msg = 'should be git hash, at least %d digits' % GIT_MIN_COMMIT_ID_LENGTH
43 raise cli.ArgTypeError(msg, '1a2b3c4d5e')
44 return s
45
46
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080047def is_git_root(path):
48 """Is given path root of git repo."""
49 return os.path.exists(os.path.join(path, '.git'))
50
51
Kuang-che Wue41e0062017-09-01 19:04:14 +080052def checkout_version(git_repo, rev):
53 """git checkout.
54
55 Args:
56 git_repo: path of git repo.
57 rev: git commit revision to checkout.
58 """
59 util.check_call('git', 'checkout', '-q', '-f', rev, cwd=git_repo)
60
61
62def is_containing_commit(git_repo, rev):
63 """Determines given commit exists.
64
65 Args:
66 git_repo: path of git repo.
67 rev: git commit revision in query.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080068
69 Returns:
70 True if rev is inside given git repo. If git_repo is not a git folder,
71 returns False as well.
Kuang-che Wue41e0062017-09-01 19:04:14 +080072 """
73 try:
74 return util.check_output(
75 'git', 'cat-file', '-t', rev, cwd=git_repo) == 'commit\n'
76 except subprocess.CalledProcessError:
77 return False
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080078 except OSError:
79 return False
Kuang-che Wue41e0062017-09-01 19:04:14 +080080
81
Kuang-che Wubfc4a642018-04-19 11:54:08 +080082def is_ancestor_commit(git_repo, old, new):
83 """Determines `old` commit is ancestor of `new` commit.
84
85 Args:
86 git_repo: path of git repo.
87 old: the ancestor commit.
88 new: the descendant commit.
89
90 Returns:
91 True only if `old` is the ancestor of `new`. One commit is not considered
92 as ancestor of itself.
93 """
94 return util.check_output(
95 'git',
96 'rev-list',
97 '--ancestry-path',
98 '-1',
99 '%s..%s' % (old, new),
100 cwd=git_repo) != ''
101
102
Kuang-che Wue41e0062017-09-01 19:04:14 +0800103def get_revlist(git_repo, old, new):
104 """Enumerates git commit between two revisions (inclusive).
105
106 Args:
107 git_repo: path of git repo.
108 old: git commit revision.
109 new: git commit revision.
110
111 Returns:
112 list of git revisions. The list contains the input revisions, old and new.
113 """
114 assert old
115 assert new
116 cmd = ['git', 'rev-list', '--reverse', '%s^..%s' % (old, new)]
117 revlist = util.check_output(*cmd, cwd=git_repo).splitlines()
118 return revlist
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800119
120
121def get_commit_log(git_repo, rev):
122 """Get git commit log.
123
124 Args:
125 git_repo: path of git repo.
126 rev: git commit revision.
127
128 Returns:
129 commit log message
130 """
131 cmd = ['git', 'log', '-1', '--format=%B', rev]
132 msg = util.check_output(*cmd, cwd=git_repo)
133 return msg
134
135
Kuang-che Wu68db08a2018-03-30 11:50:34 +0800136def get_commit_hash(git_repo, rev):
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800137 """Get git commit hash.
138
139 Args:
140 git_repo: path of git repo.
141 rev: could be git tag, branch, or (shortened) commit hash
142
143 Returns:
144 full git commit hash
145 """
146 cmd = ['git', 'rev-parse', rev]
Kuang-che Wu68db08a2018-03-30 11:50:34 +0800147 git_rev = util.check_output(*cmd, cwd=git_repo).strip()
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800148 assert git_rev
149 return git_rev
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800150
151
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800152def get_commit_time(git_repo, rev):
153 """Get git commit timestamp.
154
155 Args:
156 git_repo: path of git repo
157 rev: git commit id
158
159 Returns:
160 timestamp (int)
161 """
162 line = util.check_output(
163 'git', 'log', '-1', '--format=%ct', rev, cwd=git_repo)
164 return int(line)
165
166
167def get_file_from_revision(git_repo, rev, path):
168 """Get file content of given revision.
169
170 Args:
171 git_repo: path of git repo
172 rev: git commit id
173 path: file path
174
175 Returns:
176 file content (str)
177 """
178 return util.check_output(
179 'git', 'show', '%s:%s' % (rev, path), cwd=git_repo, log_output=False)
180
181
182def get_rev_by_time(git_repo, timestamp, *args):
183 """Query commit of given time.
184
185 Args:
186 git_repo: path of git repo.
187 timestamp: timestamp
188 args: only the selected subset of history to query. If branch name is
189 specified, only parent of the said branch is queried. If omitted, only
190 queries the parent of current working directory.
191
192 Returns:
193 git commit hash
194 """
195 return util.check_output(
196 'git',
197 'log',
198 '-1',
199 '--before',
200 str(timestamp),
201 '--format=%H',
202 *args,
203 cwd=git_repo).strip()
204
205
206def get_history(git_repo, path, after=None):
207 """Get commit history of given path.
208
209 Args:
210 git_repo: path of git repo.
211 path: path to query, relative to git_repo
212 after: limit history after given time
213
214 Yields:
215 commit timestamp, git hash
216 """
217 cmd = ['git', 'log', '--reverse', '--format=%ct %H']
218 if after:
219 cmd += ['--after', str(after)]
220 cmd.append(path)
221 for line in util.check_output(*cmd, cwd=git_repo).splitlines():
222 commit_time, git_rev = line.split()
223 yield int(commit_time), git_rev
224
225
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800226class Diff(object):
227 """Class to describe the difference between git commits.
228
229 Attributes:
230 timestamp: commit timestamp
231 path: git repo path relative to project root
232 action: action to make the diff, possible value: CHECKOUT_TO, ADD, REMOVE.
233 git_rev: git commit hash
234 """
235 CHECKOUT_TO = 'checkout_to'
236 ADD = 'add'
237 REMOVE = 'remove'
238
239 def __init__(self, timestamp, path, action, git_rev=None):
240 self.timestamp = timestamp
241 self.path = path
242 self.action = action
243 self.git_rev = git_rev
244
245 def apply(self, base_dir):
246 """Applies the diff on disk.
247
248 Args:
249 base_dir: the project root where self.path is relative to
250 """
251 assert self.path
252 git_repo = os.path.join(base_dir, self.path)
253 if self.action == Diff.CHECKOUT_TO:
254 checkout_version(git_repo, self.git_rev)
255 return
256 if self.action in ['add', 'remove']:
257 raise NotImplementedError
258 assert 0
259
260 def summary(self, base_dir):
261 """Summary string of this diff.
262
263 Args:
264 base_dir: the project root where self.path is relative to
265 """
266 if self.action == Diff.CHECKOUT_TO:
267 git_repo = os.path.join(base_dir, self.path)
268 summary = get_commit_log(git_repo, self.git_rev).splitlines()[0]
269 return '%s %s %r' % (self.git_rev[:10], self.path, summary)
270 return '%s %s' % (self.action, self.path)
271
272 def __eq__(self, rhs):
273 return (self.timestamp == rhs.timestamp and self.path == rhs.path and
274 self.action == rhs.action and self.git_rev == rhs.git_rev)
275
276
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800277def list_commits_between_commits(git_repo, old, new):
278 """Get all commits between (old, new].
279
280 Args:
281 git_repo: path of git repo.
282 old: old commit hash (exclusive)
283 new: new commit hash (inclusive)
284
285 Returns:
286 list of (timestamp, rev)
287 """
288 assert old and new
289 assert old == new or is_ancestor_commit(git_repo, old, new)
290 commits = []
291 # --first-parent is necessary for Android, see following link for more
292 # discussion.
293 # https://docs.google.com/document/d/1c8qiq14_ObRRjLT62sk9r5V5cyCGHX66dLYab4MVnks/edit#heading=h.n3i6mt2n6xuu
294 for line in util.check_output(
295 'git',
296 'rev-list',
297 '--timestamp',
298 '--reverse',
299 '--first-parent',
300 '%s..%s' % (old, new),
301 cwd=git_repo).splitlines():
302 timestamp, git_rev = line.split()
303 commits.append([int(timestamp), git_rev])
304
305 # bisect-kit has a fundamental assumption that commit timestamps are
306 # increasing because we sort and bisect the commits by timestamp across git
307 # repos. If not increasing, we have to adjust the timestamp as workaround.
308 # This might lead to bad bisect result, however the bad probability is low in
309 # practice since most machines' clocks are good enough.
310 if commits != sorted(commits, key=lambda x: x[0]):
311 logger.warning('Commit timestamps are not increasing')
312 last_timestamp = -1
313 adjusted = 0
314 for commit in commits:
315 if commit[0] < last_timestamp:
316 commit[0] = last_timestamp
317 adjusted += 1
318
319 last_timestamp = commit[0]
320 logger.warning('%d timestamps adjusted', adjusted)
321
322 return commits
323
324
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800325def get_difflist_between_two_commit(base_dir, path, old, new):
326 """Get difflist between (old, new].
327
328 Args:
329 base_dir: the project root
330 path: the path relative to the project root
331 old: old commit hash (exclusive)
332 new: new commit hash (inclusive)
333
334 Returns:
335 list of Diff objects
336 """
337 git_repo = os.path.join(base_dir, path)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800338 difflist = []
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800339 for timestamp, git_rev in list_commits_between_commits(git_repo, old, new):
340 difflist.append(Diff(timestamp, path, Diff.CHECKOUT_TO, git_rev))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800341 return difflist
342
343
344def get_difflist_between_two_set(base_dir, old_set, new_set):
345 result = []
346 for path in set(old_set) | set(new_set):
347 git_repo = os.path.join(base_dir, path)
348 if path in old_set and path in new_set:
349 old = old_set[path]
350 new = new_set[path]
351 if old == new:
352 # nochange, do nothing
353 pass
354 elif is_ancestor_commit(git_repo, old, new):
355 # normal case
356 for diff in get_difflist_between_two_commit(base_dir, path, old, new):
357 result.append(diff)
358 else:
359 # maybe switch branch?
360 # TODO(kcwu): handle discontinuous properly (crbug.com/827092)
361 logger.warning(
362 'Warning: dependency "%s" discontinuous. Not supported yet', path)
363 return []
364 elif path in old_set:
365 # remove dependency
366 # TODO(kcwu): handle removal properly (crbug.com/827092)
367 logger.warning('Warning: dependency "%s" was removed. Not supported yet',
368 path)
369 return []
370 else:
371 assert path in new_set
372 # add dependency
373 # TODO(kcwu): handle addition properly (crbug.com/827092)
374 logger.warning('Warning: dependency "%s" was added. Not supported yet',
375 path)
376 return []
377
378 result.sort(key=lambda diff: (diff.timestamp, diff.path))
379
380 return result