blob: e4b4e78015a926220a33578fc88e9bda023d7709 [file] [log] [blame]
Kuang-che Wu6e4beca2018-06-27 17:45:02 +08001# -*- coding: utf-8 -*-
Kuang-che Wue41e0062017-09-01 19:04:14 +08002# Copyright 2017 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Git utility."""
6
7from __future__ import print_function
8import logging
Kuang-che Wubfc4a642018-04-19 11:54:08 +08009import os
Kuang-che Wue41e0062017-09-01 19:04:14 +080010import re
11import subprocess
12
13from bisect_kit import cli
14from bisect_kit import util
15
16logger = logging.getLogger(__name__)
17
18GIT_FULL_COMMIT_ID_LENGTH = 40
19
20# Minimal acceptable length of git commit id.
21#
22# For chromium, hash collision rate over number of digits:
23# - 6 digits: 4.85%
24# - 7 digits: 0.32%
25# - 8 digits: 0.01%
26# As foolproof check, 7 digits should be enough.
27GIT_MIN_COMMIT_ID_LENGTH = 7
28
29
30def is_git_rev(s):
31 """Is a git hash-like version string.
32
33 It accepts shortened hash with at least 7 digits.
34 """
35 if not GIT_MIN_COMMIT_ID_LENGTH <= len(s) <= GIT_FULL_COMMIT_ID_LENGTH:
36 return False
37 return bool(re.match(r'^[0-9a-f]+$', s))
38
39
40def argtype_git_rev(s):
41 """Validates git hash."""
42 if not is_git_rev(s):
43 msg = 'should be git hash, at least %d digits' % GIT_MIN_COMMIT_ID_LENGTH
44 raise cli.ArgTypeError(msg, '1a2b3c4d5e')
45 return s
46
47
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080048def is_git_root(path):
49 """Is given path root of git repo."""
50 return os.path.exists(os.path.join(path, '.git'))
51
52
Kuang-che Wue41e0062017-09-01 19:04:14 +080053def checkout_version(git_repo, rev):
54 """git checkout.
55
56 Args:
57 git_repo: path of git repo.
58 rev: git commit revision to checkout.
59 """
60 util.check_call('git', 'checkout', '-q', '-f', rev, cwd=git_repo)
61
62
63def is_containing_commit(git_repo, rev):
64 """Determines given commit exists.
65
66 Args:
67 git_repo: path of git repo.
68 rev: git commit revision in query.
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080069
70 Returns:
71 True if rev is inside given git repo. If git_repo is not a git folder,
72 returns False as well.
Kuang-che Wue41e0062017-09-01 19:04:14 +080073 """
74 try:
75 return util.check_output(
76 'git', 'cat-file', '-t', rev, cwd=git_repo) == 'commit\n'
77 except subprocess.CalledProcessError:
78 return False
Kuang-che Wu3eb6b502018-06-06 16:15:18 +080079 except OSError:
80 return False
Kuang-che Wue41e0062017-09-01 19:04:14 +080081
82
Kuang-che Wubfc4a642018-04-19 11:54:08 +080083def is_ancestor_commit(git_repo, old, new):
84 """Determines `old` commit is ancestor of `new` commit.
85
86 Args:
87 git_repo: path of git repo.
88 old: the ancestor commit.
89 new: the descendant commit.
90
91 Returns:
92 True only if `old` is the ancestor of `new`. One commit is not considered
93 as ancestor of itself.
94 """
95 return util.check_output(
96 'git',
97 'rev-list',
98 '--ancestry-path',
99 '-1',
100 '%s..%s' % (old, new),
101 cwd=git_repo) != ''
102
103
Kuang-che Wue41e0062017-09-01 19:04:14 +0800104def get_revlist(git_repo, old, new):
105 """Enumerates git commit between two revisions (inclusive).
106
107 Args:
108 git_repo: path of git repo.
109 old: git commit revision.
110 new: git commit revision.
111
112 Returns:
113 list of git revisions. The list contains the input revisions, old and new.
114 """
115 assert old
116 assert new
117 cmd = ['git', 'rev-list', '--reverse', '%s^..%s' % (old, new)]
118 revlist = util.check_output(*cmd, cwd=git_repo).splitlines()
119 return revlist
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800120
121
122def get_commit_log(git_repo, rev):
123 """Get git commit log.
124
125 Args:
126 git_repo: path of git repo.
127 rev: git commit revision.
128
129 Returns:
130 commit log message
131 """
132 cmd = ['git', 'log', '-1', '--format=%B', rev]
133 msg = util.check_output(*cmd, cwd=git_repo)
134 return msg
135
136
Kuang-che Wu68db08a2018-03-30 11:50:34 +0800137def get_commit_hash(git_repo, rev):
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800138 """Get git commit hash.
139
140 Args:
141 git_repo: path of git repo.
142 rev: could be git tag, branch, or (shortened) commit hash
143
144 Returns:
145 full git commit hash
146 """
147 cmd = ['git', 'rev-parse', rev]
Kuang-che Wu68db08a2018-03-30 11:50:34 +0800148 git_rev = util.check_output(*cmd, cwd=git_repo).strip()
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800149 assert git_rev
150 return git_rev
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800151
152
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800153def get_commit_time(git_repo, rev):
154 """Get git commit timestamp.
155
156 Args:
157 git_repo: path of git repo
158 rev: git commit id
159
160 Returns:
161 timestamp (int)
162 """
163 line = util.check_output(
164 'git', 'log', '-1', '--format=%ct', rev, cwd=git_repo)
165 return int(line)
166
167
168def get_file_from_revision(git_repo, rev, path):
169 """Get file content of given revision.
170
171 Args:
172 git_repo: path of git repo
173 rev: git commit id
174 path: file path
175
176 Returns:
177 file content (str)
178 """
179 return util.check_output(
180 'git', 'show', '%s:%s' % (rev, path), cwd=git_repo, log_output=False)
181
182
183def get_rev_by_time(git_repo, timestamp, *args):
184 """Query commit of given time.
185
186 Args:
187 git_repo: path of git repo.
188 timestamp: timestamp
189 args: only the selected subset of history to query. If branch name is
190 specified, only parent of the said branch is queried. If omitted, only
191 queries the parent of current working directory.
192
193 Returns:
194 git commit hash
195 """
196 return util.check_output(
197 'git',
198 'log',
199 '-1',
200 '--before',
201 str(timestamp),
202 '--format=%H',
203 *args,
204 cwd=git_repo).strip()
205
206
207def get_history(git_repo, path, after=None):
208 """Get commit history of given path.
209
210 Args:
211 git_repo: path of git repo.
212 path: path to query, relative to git_repo
213 after: limit history after given time
214
215 Yields:
216 commit timestamp, git hash
217 """
218 cmd = ['git', 'log', '--reverse', '--format=%ct %H']
219 if after:
220 cmd += ['--after', str(after)]
221 cmd.append(path)
222 for line in util.check_output(*cmd, cwd=git_repo).splitlines():
223 commit_time, git_rev = line.split()
224 yield int(commit_time), git_rev
225
226
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800227class Diff(object):
228 """Class to describe the difference between git commits.
229
230 Attributes:
231 timestamp: commit timestamp
232 path: git repo path relative to project root
233 action: action to make the diff, possible value: CHECKOUT_TO, ADD, REMOVE.
234 git_rev: git commit hash
235 """
236 CHECKOUT_TO = 'checkout_to'
237 ADD = 'add'
238 REMOVE = 'remove'
239
240 def __init__(self, timestamp, path, action, git_rev=None):
241 self.timestamp = timestamp
242 self.path = path
243 self.action = action
244 self.git_rev = git_rev
245
246 def apply(self, base_dir):
247 """Applies the diff on disk.
248
249 Args:
250 base_dir: the project root where self.path is relative to
251 """
252 assert self.path
253 git_repo = os.path.join(base_dir, self.path)
254 if self.action == Diff.CHECKOUT_TO:
255 checkout_version(git_repo, self.git_rev)
256 return
257 if self.action in ['add', 'remove']:
258 raise NotImplementedError
259 assert 0
260
261 def summary(self, base_dir):
262 """Summary string of this diff.
263
264 Args:
265 base_dir: the project root where self.path is relative to
266 """
267 if self.action == Diff.CHECKOUT_TO:
268 git_repo = os.path.join(base_dir, self.path)
269 summary = get_commit_log(git_repo, self.git_rev).splitlines()[0]
270 return '%s %s %r' % (self.git_rev[:10], self.path, summary)
271 return '%s %s' % (self.action, self.path)
272
273 def __eq__(self, rhs):
274 return (self.timestamp == rhs.timestamp and self.path == rhs.path and
275 self.action == rhs.action and self.git_rev == rhs.git_rev)
276
277
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800278def list_commits_between_commits(git_repo, old, new):
279 """Get all commits between (old, new].
280
281 Args:
282 git_repo: path of git repo.
283 old: old commit hash (exclusive)
284 new: new commit hash (inclusive)
285
286 Returns:
287 list of (timestamp, rev)
288 """
289 assert old and new
290 assert old == new or is_ancestor_commit(git_repo, old, new)
291 commits = []
292 # --first-parent is necessary for Android, see following link for more
293 # discussion.
294 # https://docs.google.com/document/d/1c8qiq14_ObRRjLT62sk9r5V5cyCGHX66dLYab4MVnks/edit#heading=h.n3i6mt2n6xuu
295 for line in util.check_output(
296 'git',
297 'rev-list',
298 '--timestamp',
299 '--reverse',
300 '--first-parent',
301 '%s..%s' % (old, new),
302 cwd=git_repo).splitlines():
303 timestamp, git_rev = line.split()
304 commits.append([int(timestamp), git_rev])
305
306 # bisect-kit has a fundamental assumption that commit timestamps are
307 # increasing because we sort and bisect the commits by timestamp across git
308 # repos. If not increasing, we have to adjust the timestamp as workaround.
309 # This might lead to bad bisect result, however the bad probability is low in
310 # practice since most machines' clocks are good enough.
311 if commits != sorted(commits, key=lambda x: x[0]):
312 logger.warning('Commit timestamps are not increasing')
313 last_timestamp = -1
314 adjusted = 0
315 for commit in commits:
316 if commit[0] < last_timestamp:
317 commit[0] = last_timestamp
318 adjusted += 1
319
320 last_timestamp = commit[0]
321 logger.warning('%d timestamps adjusted', adjusted)
322
323 return commits
324
325
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800326def get_difflist_between_two_commit(base_dir, path, old, new):
327 """Get difflist between (old, new].
328
329 Args:
330 base_dir: the project root
331 path: the path relative to the project root
332 old: old commit hash (exclusive)
333 new: new commit hash (inclusive)
334
335 Returns:
336 list of Diff objects
337 """
338 git_repo = os.path.join(base_dir, path)
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800339 difflist = []
Kuang-che Wu3eb6b502018-06-06 16:15:18 +0800340 for timestamp, git_rev in list_commits_between_commits(git_repo, old, new):
341 difflist.append(Diff(timestamp, path, Diff.CHECKOUT_TO, git_rev))
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800342 return difflist
343
344
345def get_difflist_between_two_set(base_dir, old_set, new_set):
346 result = []
347 for path in set(old_set) | set(new_set):
348 git_repo = os.path.join(base_dir, path)
349 if path in old_set and path in new_set:
350 old = old_set[path]
351 new = new_set[path]
352 if old == new:
353 # nochange, do nothing
354 pass
355 elif is_ancestor_commit(git_repo, old, new):
356 # normal case
357 for diff in get_difflist_between_two_commit(base_dir, path, old, new):
358 result.append(diff)
359 else:
360 # maybe switch branch?
361 # TODO(kcwu): handle discontinuous properly (crbug.com/827092)
362 logger.warning(
363 'Warning: dependency "%s" discontinuous. Not supported yet', path)
364 return []
365 elif path in old_set:
366 # remove dependency
367 # TODO(kcwu): handle removal properly (crbug.com/827092)
368 logger.warning('Warning: dependency "%s" was removed. Not supported yet',
369 path)
370 return []
371 else:
372 assert path in new_set
373 # add dependency
374 # TODO(kcwu): handle addition properly (crbug.com/827092)
375 logger.warning('Warning: dependency "%s" was added. Not supported yet',
376 path)
377 return []
378
379 result.sort(key=lambda diff: (diff.timestamp, diff.path))
380
381 return result