blob: 5a7daa0cf56b2db00fb5c98b391f7572fa62ed0c [file] [log] [blame]
mgiuca@chromium.org81937562016-02-03 08:00:53 +00001#!/usr/bin/env python
2# Copyright 2016 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Wrapper around git blame that ignores certain commits.
7"""
8
9from __future__ import print_function
10
11import argparse
12import collections
13import logging
14import os
15import subprocess2
16import sys
17
18import git_common
19import git_dates
20
21
22logging.getLogger().setLevel(logging.INFO)
23
24
25class Commit(object):
26 """Info about a commit."""
27 def __init__(self, commithash):
28 self.commithash = commithash
29 self.author = None
30 self.author_mail = None
31 self.author_time = None
32 self.author_tz = None
33 self.committer = None
34 self.committer_mail = None
35 self.committer_time = None
36 self.committer_tz = None
37 self.summary = None
38 self.boundary = None
39 self.previous = None
40 self.filename = None
41
42 def __repr__(self): # pragma: no cover
43 return '<Commit %s>' % self.commithash
44
45
46BlameLine = collections.namedtuple(
47 'BlameLine',
48 'commit context lineno_then lineno_now modified')
49
50
51def parse_blame(blameoutput):
52 """Parses the output of git blame -p into a data structure."""
53 lines = blameoutput.split('\n')
54 i = 0
55 commits = {}
56
57 while i < len(lines):
58 # Read a commit line and parse it.
59 line = lines[i]
60 i += 1
61 if not line.strip():
62 continue
63 commitline = line.split()
64 commithash = commitline[0]
65 lineno_then = int(commitline[1])
66 lineno_now = int(commitline[2])
67
68 try:
69 commit = commits[commithash]
70 except KeyError:
71 commit = Commit(commithash)
72 commits[commithash] = commit
73
74 # Read commit details until we find a context line.
75 while i < len(lines):
76 line = lines[i]
77 i += 1
78 if line.startswith('\t'):
79 break
80
81 try:
82 key, value = line.split(' ', 1)
83 except ValueError:
84 key = line
85 value = True
86 setattr(commit, key.replace('-', '_'), value)
87
88 context = line[1:]
89
90 yield BlameLine(commit, context, lineno_then, lineno_now, False)
91
92
93def print_table(table, colsep=' ', rowsep='\n', align=None, out=sys.stdout):
94 """Print a 2D rectangular array, aligning columns with spaces.
95
96 Args:
97 align: Optional string of 'l' and 'r', designating whether each column is
98 left- or right-aligned. Defaults to left aligned.
99 """
100 if len(table) == 0:
101 return
102
103 colwidths = None
104 for row in table:
105 if colwidths is None:
106 colwidths = [len(x) for x in row]
107 else:
108 colwidths = [max(colwidths[i], len(x)) for i, x in enumerate(row)]
109
110 if align is None: # pragma: no cover
111 align = 'l' * len(colwidths)
112
113 for row in table:
114 cells = []
115 for i, cell in enumerate(row):
116 padding = ' ' * (colwidths[i] - len(cell))
117 if align[i] == 'r':
118 cell = padding + cell
119 elif i < len(row) - 1:
120 # Do not pad the final column if left-aligned.
121 cell += padding
122 cells.append(cell)
123 try:
124 print(*cells, sep=colsep, end=rowsep, file=out)
125 except IOError: # pragma: no cover
126 # Can happen on Windows if the pipe is closed early.
127 pass
128
129
130def pretty_print(parsedblame, show_filenames=False, out=sys.stdout):
131 """Pretty-prints the output of parse_blame."""
132 table = []
133 for line in parsedblame:
134 author_time = git_dates.timestamp_offset_to_datetime(
135 line.commit.author_time, line.commit.author_tz)
136 row = [line.commit.commithash[:8],
137 '(' + line.commit.author,
138 git_dates.datetime_string(author_time),
139 str(line.lineno_now) + ('*' if line.modified else '') + ')',
140 line.context]
141 if show_filenames:
142 row.insert(1, line.commit.filename)
143 table.append(row)
144 print_table(table, align='llllrl' if show_filenames else 'lllrl', out=out)
145
146
147def get_parsed_blame(filename, revision='HEAD'):
148 blame = git_common.blame(filename, revision=revision, porcelain=True)
149 return list(parse_blame(blame))
150
151
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000152# Map from (oldrev, newrev) to hunk list (caching the results of git diff, but
153# only the hunk line numbers, not the actual diff contents).
154# hunk list contains (old, new) pairs, where old and new are (start, length)
155# pairs. A hunk list can also be None (if the diff failed).
156diff_hunks_cache = {}
157
158
159def cache_diff_hunks(oldrev, newrev):
160 def parse_start_length(s):
161 # Chop the '-' or '+'.
162 s = s[1:]
163 # Length is optional (defaults to 1).
164 try:
165 start, length = s.split(',')
166 except ValueError:
167 start = s
168 length = 1
169 return int(start), int(length)
170
171 try:
172 return diff_hunks_cache[(oldrev, newrev)]
173 except KeyError:
174 pass
175
176 # Use -U0 to get the smallest possible hunks.
177 diff = git_common.diff(oldrev, newrev, '-U0')
178
179 # Get all the hunks.
180 hunks = []
181 for line in diff.split('\n'):
182 if not line.startswith('@@'):
183 continue
184 ranges = line.split(' ', 3)[1:3]
185 ranges = tuple(parse_start_length(r) for r in ranges)
186 hunks.append(ranges)
187
188 diff_hunks_cache[(oldrev, newrev)] = hunks
189 return hunks
190
191
192def approx_lineno_across_revs(filename, newfilename, revision, newrevision,
193 lineno):
194 """Computes the approximate movement of a line number between two revisions.
195
196 Consider line |lineno| in |filename| at |revision|. This function computes the
197 line number of that line in |newfilename| at |newrevision|. This is
198 necessarily approximate.
199
200 Args:
201 filename: The file (within the repo) at |revision|.
202 newfilename: The name of the same file at |newrevision|.
203 revision: A git revision.
204 newrevision: Another git revision. Note: Can be ahead or behind |revision|.
205 lineno: Line number within |filename| at |revision|.
206
207 Returns:
208 Line number within |newfilename| at |newrevision|.
209 """
210 # This doesn't work that well if there are a lot of line changes within the
211 # hunk (demonstrated by GitHyperBlameLineMotionTest.testIntraHunkLineMotion).
212 # A fuzzy heuristic that takes the text of the new line and tries to find a
213 # deleted line within the hunk that mostly matches the new line could help.
214
215 # Use the <revision>:<filename> syntax to diff between two blobs. This is the
216 # only way to diff a file that has been renamed.
217 old = '%s:%s' % (revision, filename)
218 new = '%s:%s' % (newrevision, newfilename)
219 hunks = cache_diff_hunks(old, new)
220
221 cumulative_offset = 0
222
223 # Find the hunk containing lineno (if any).
224 for (oldstart, oldlength), (newstart, newlength) in hunks:
225 cumulative_offset += newlength - oldlength
226
227 if lineno >= oldstart + oldlength:
228 # Not there yet.
229 continue
230
231 if lineno < oldstart:
232 # Gone too far.
233 break
234
235 # lineno is in [oldstart, oldlength] at revision; [newstart, newlength] at
236 # newrevision.
237
238 # If newlength == 0, newstart will be the line before the deleted hunk.
239 # Since the line must have been deleted, just return that as the nearest
240 # line in the new file. Caution: newstart can be 0 in this case.
241 if newlength == 0:
242 return max(1, newstart)
243
244 newend = newstart + newlength - 1
245
246 # Move lineno based on the amount the entire hunk shifted.
247 lineno = lineno + newstart - oldstart
248 # Constrain the output within the range [newstart, newend].
249 return min(newend, max(newstart, lineno))
250
251 # Wasn't in a hunk. Figure out the line motion based on the difference in
252 # length between the hunks seen so far.
253 return lineno + cumulative_offset
254
255
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000256def hyper_blame(ignored, filename, revision='HEAD', out=sys.stdout,
257 err=sys.stderr):
258 # Map from commit to parsed blame from that commit.
259 blame_from = {}
260
261 def cache_blame_from(filename, commithash):
262 try:
263 return blame_from[commithash]
264 except KeyError:
265 parsed = get_parsed_blame(filename, commithash)
266 blame_from[commithash] = parsed
267 return parsed
268
269 try:
270 parsed = cache_blame_from(filename, git_common.hash_one(revision))
271 except subprocess2.CalledProcessError as e:
272 err.write(e.stderr)
273 return e.returncode
274
275 new_parsed = []
276
277 # We don't show filenames in blame output unless we have to.
278 show_filenames = False
279
280 for line in parsed:
281 # If a line references an ignored commit, blame that commit's parent
282 # repeatedly until we find a non-ignored commit.
283 while line.commit.commithash in ignored:
284 if line.commit.previous is None:
285 # You can't ignore the commit that added this file.
286 break
287
288 previouscommit, previousfilename = line.commit.previous.split(' ', 1)
289 parent_blame = cache_blame_from(previousfilename, previouscommit)
290
291 if len(parent_blame) == 0:
292 # The previous version of this file was empty, therefore, you can't
293 # ignore this commit.
294 break
295
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000296 # line.lineno_then is the line number in question at line.commit. We need
297 # to translate that line number so that it refers to the position of the
298 # same line on previouscommit.
299 lineno_previous = approx_lineno_across_revs(
300 line.commit.filename, previousfilename, line.commit.commithash,
301 previouscommit, line.lineno_then)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000302 logging.debug('ignore commit %s on line p%d/t%d/n%d',
303 line.commit.commithash, lineno_previous, line.lineno_then,
304 line.lineno_now)
305
306 # Get the line at lineno_previous in the parent commit.
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000307 assert 1 <= lineno_previous <= len(parent_blame)
308 newline = parent_blame[lineno_previous - 1]
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000309
310 # Replace the commit and lineno_then, but not the lineno_now or context.
311 logging.debug(' replacing with %r', newline)
312 line = BlameLine(newline.commit, line.context, lineno_previous,
313 line.lineno_now, True)
314
315 # If any line has a different filename to the file's current name, turn on
316 # filename display for the entire blame output.
317 if line.commit.filename != filename:
318 show_filenames = True
319
320 new_parsed.append(line)
321
322 pretty_print(new_parsed, show_filenames=show_filenames, out=out)
323
324 return 0
325
326def main(args, stdout=sys.stdout, stderr=sys.stderr):
327 parser = argparse.ArgumentParser(
328 prog='git hyper-blame',
329 description='git blame with support for ignoring certain commits.')
330 parser.add_argument('-i', metavar='REVISION', action='append', dest='ignored',
331 default=[], help='a revision to ignore')
332 parser.add_argument('revision', nargs='?', default='HEAD', metavar='REVISION',
333 help='revision to look at')
334 parser.add_argument('filename', metavar='FILE', help='filename to blame')
335
336 args = parser.parse_args(args)
337 try:
338 repo_root = git_common.repo_root()
339 except subprocess2.CalledProcessError as e:
340 stderr.write(e.stderr)
341 return e.returncode
342
343 # Make filename relative to the repository root, and cd to the root dir (so
344 # all filenames throughout this script are relative to the root).
345 filename = os.path.relpath(args.filename, repo_root)
346 os.chdir(repo_root)
347
348 # Normalize filename so we can compare it to other filenames git gives us.
349 filename = os.path.normpath(filename)
350 filename = os.path.normcase(filename)
351
352 ignored = set()
353 for c in args.ignored:
354 try:
355 ignored.add(git_common.hash_one(c))
356 except subprocess2.CalledProcessError as e:
357 # Custom error message (the message from git-rev-parse is inappropriate).
358 stderr.write('fatal: unknown revision \'%s\'.\n' % c)
359 return e.returncode
360
361 return hyper_blame(ignored, filename, args.revision, out=stdout, err=stderr)
362
363
364if __name__ == '__main__': # pragma: no cover
365 with git_common.less() as less_input:
366 sys.exit(main(sys.argv[1:], stdout=less_input))