blob: b2d256157cff3b145a82caa066194bfeca0d8bed [file] [log] [blame]
Kuang-che Wue41e0062017-09-01 19:04:14 +08001# Copyright 2017 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Git utility."""
5
6from __future__ import print_function
7import logging
Kuang-che Wubfc4a642018-04-19 11:54:08 +08008import os
Kuang-che Wue41e0062017-09-01 19:04:14 +08009import re
10import subprocess
11
12from bisect_kit import cli
13from bisect_kit import util
14
15logger = logging.getLogger(__name__)
16
17GIT_FULL_COMMIT_ID_LENGTH = 40
18
19# Minimal acceptable length of git commit id.
20#
21# For chromium, hash collision rate over number of digits:
22# - 6 digits: 4.85%
23# - 7 digits: 0.32%
24# - 8 digits: 0.01%
25# As foolproof check, 7 digits should be enough.
26GIT_MIN_COMMIT_ID_LENGTH = 7
27
28
29def is_git_rev(s):
30 """Is a git hash-like version string.
31
32 It accepts shortened hash with at least 7 digits.
33 """
34 if not GIT_MIN_COMMIT_ID_LENGTH <= len(s) <= GIT_FULL_COMMIT_ID_LENGTH:
35 return False
36 return bool(re.match(r'^[0-9a-f]+$', s))
37
38
39def argtype_git_rev(s):
40 """Validates git hash."""
41 if not is_git_rev(s):
42 msg = 'should be git hash, at least %d digits' % GIT_MIN_COMMIT_ID_LENGTH
43 raise cli.ArgTypeError(msg, '1a2b3c4d5e')
44 return s
45
46
47def checkout_version(git_repo, rev):
48 """git checkout.
49
50 Args:
51 git_repo: path of git repo.
52 rev: git commit revision to checkout.
53 """
54 util.check_call('git', 'checkout', '-q', '-f', rev, cwd=git_repo)
55
56
57def is_containing_commit(git_repo, rev):
58 """Determines given commit exists.
59
60 Args:
61 git_repo: path of git repo.
62 rev: git commit revision in query.
63 """
64 try:
65 return util.check_output(
66 'git', 'cat-file', '-t', rev, cwd=git_repo) == 'commit\n'
67 except subprocess.CalledProcessError:
68 return False
69
70
Kuang-che Wubfc4a642018-04-19 11:54:08 +080071def is_ancestor_commit(git_repo, old, new):
72 """Determines `old` commit is ancestor of `new` commit.
73
74 Args:
75 git_repo: path of git repo.
76 old: the ancestor commit.
77 new: the descendant commit.
78
79 Returns:
80 True only if `old` is the ancestor of `new`. One commit is not considered
81 as ancestor of itself.
82 """
83 return util.check_output(
84 'git',
85 'rev-list',
86 '--ancestry-path',
87 '-1',
88 '%s..%s' % (old, new),
89 cwd=git_repo) != ''
90
91
Kuang-che Wue41e0062017-09-01 19:04:14 +080092def get_revlist(git_repo, old, new):
93 """Enumerates git commit between two revisions (inclusive).
94
95 Args:
96 git_repo: path of git repo.
97 old: git commit revision.
98 new: git commit revision.
99
100 Returns:
101 list of git revisions. The list contains the input revisions, old and new.
102 """
103 assert old
104 assert new
105 cmd = ['git', 'rev-list', '--reverse', '%s^..%s' % (old, new)]
106 revlist = util.check_output(*cmd, cwd=git_repo).splitlines()
107 return revlist
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800108
109
110def get_commit_log(git_repo, rev):
111 """Get git commit log.
112
113 Args:
114 git_repo: path of git repo.
115 rev: git commit revision.
116
117 Returns:
118 commit log message
119 """
120 cmd = ['git', 'log', '-1', '--format=%B', rev]
121 msg = util.check_output(*cmd, cwd=git_repo)
122 return msg
123
124
Kuang-che Wu68db08a2018-03-30 11:50:34 +0800125def get_commit_hash(git_repo, rev):
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800126 """Get git commit hash.
127
128 Args:
129 git_repo: path of git repo.
130 rev: could be git tag, branch, or (shortened) commit hash
131
132 Returns:
133 full git commit hash
134 """
135 cmd = ['git', 'rev-parse', rev]
Kuang-che Wu68db08a2018-03-30 11:50:34 +0800136 git_rev = util.check_output(*cmd, cwd=git_repo).strip()
Kuang-che Wue2563ea2018-01-05 20:30:28 +0800137 assert git_rev
138 return git_rev
Kuang-che Wubfc4a642018-04-19 11:54:08 +0800139
140
141class Diff(object):
142 """Class to describe the difference between git commits.
143
144 Attributes:
145 timestamp: commit timestamp
146 path: git repo path relative to project root
147 action: action to make the diff, possible value: CHECKOUT_TO, ADD, REMOVE.
148 git_rev: git commit hash
149 """
150 CHECKOUT_TO = 'checkout_to'
151 ADD = 'add'
152 REMOVE = 'remove'
153
154 def __init__(self, timestamp, path, action, git_rev=None):
155 self.timestamp = timestamp
156 self.path = path
157 self.action = action
158 self.git_rev = git_rev
159
160 def apply(self, base_dir):
161 """Applies the diff on disk.
162
163 Args:
164 base_dir: the project root where self.path is relative to
165 """
166 assert self.path
167 git_repo = os.path.join(base_dir, self.path)
168 if self.action == Diff.CHECKOUT_TO:
169 checkout_version(git_repo, self.git_rev)
170 return
171 if self.action in ['add', 'remove']:
172 raise NotImplementedError
173 assert 0
174
175 def summary(self, base_dir):
176 """Summary string of this diff.
177
178 Args:
179 base_dir: the project root where self.path is relative to
180 """
181 if self.action == Diff.CHECKOUT_TO:
182 git_repo = os.path.join(base_dir, self.path)
183 summary = get_commit_log(git_repo, self.git_rev).splitlines()[0]
184 return '%s %s %r' % (self.git_rev[:10], self.path, summary)
185 return '%s %s' % (self.action, self.path)
186
187 def __eq__(self, rhs):
188 return (self.timestamp == rhs.timestamp and self.path == rhs.path and
189 self.action == rhs.action and self.git_rev == rhs.git_rev)
190
191
192def get_difflist_between_two_commit(base_dir, path, old, new):
193 """Get difflist between (old, new].
194
195 Args:
196 base_dir: the project root
197 path: the path relative to the project root
198 old: old commit hash (exclusive)
199 new: new commit hash (inclusive)
200
201 Returns:
202 list of Diff objects
203 """
204 git_repo = os.path.join(base_dir, path)
205 assert old and new
206 assert old == new or is_ancestor_commit(git_repo, old, new)
207 difflist = []
208 # --first-parent is necessary for Android, see following link for more
209 # discussion.
210 # https://docs.google.com/document/d/1c8qiq14_ObRRjLT62sk9r5V5cyCGHX66dLYab4MVnks/edit#heading=h.n3i6mt2n6xuu
211 for line in util.check_output(
212 'git',
213 'rev-list',
214 '--timestamp',
215 '--reverse',
216 '--first-parent',
217 '%s..%s' % (old, new),
218 cwd=git_repo).splitlines():
219 timestamp, git_rev = line.split()
220 difflist.append(Diff(int(timestamp), path, Diff.CHECKOUT_TO, git_rev))
221
222 # bisect-kit has a fundamental assumption that commit timestamps are
223 # increasing because we sort and bisect the commits by timestamp across git
224 # repos. If not increasing, we have to adjust the timestamp as workaround.
225 # This might lead to bad bisect result, however the bad probability is low in
226 # practice since most machines' clocks are good enough.
227 if difflist != sorted(difflist, key=lambda x: x.timestamp):
228 logger.warning('Commit timestamps are not increasing')
229 last_timestamp = -1
230 adjusted = 0
231 for diff in difflist:
232 if diff.timestamp < last_timestamp:
233 diff.timestamp = last_timestamp
234 adjusted += 1
235
236 last_timestamp = diff.timestamp
237 logger.warning('%d timestamps adjusted', adjusted)
238
239 return difflist
240
241
242def get_difflist_between_two_set(base_dir, old_set, new_set):
243 result = []
244 for path in set(old_set) | set(new_set):
245 git_repo = os.path.join(base_dir, path)
246 if path in old_set and path in new_set:
247 old = old_set[path]
248 new = new_set[path]
249 if old == new:
250 # nochange, do nothing
251 pass
252 elif is_ancestor_commit(git_repo, old, new):
253 # normal case
254 for diff in get_difflist_between_two_commit(base_dir, path, old, new):
255 result.append(diff)
256 else:
257 # maybe switch branch?
258 # TODO(kcwu): handle discontinuous properly (crbug.com/827092)
259 logger.warning(
260 'Warning: dependency "%s" discontinuous. Not supported yet', path)
261 return []
262 elif path in old_set:
263 # remove dependency
264 # TODO(kcwu): handle removal properly (crbug.com/827092)
265 logger.warning('Warning: dependency "%s" was removed. Not supported yet',
266 path)
267 return []
268 else:
269 assert path in new_set
270 # add dependency
271 # TODO(kcwu): handle addition properly (crbug.com/827092)
272 logger.warning('Warning: dependency "%s" was added. Not supported yet',
273 path)
274 return []
275
276 result.sort(key=lambda diff: (diff.timestamp, diff.path))
277
278 return result