blob: 80456835907fc659c045bd628f8319e77bdc8248 [file] [log] [blame]
mgiuca@chromium.org81937562016-02-03 08:00:53 +00001#!/usr/bin/env python
2# Copyright 2016 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Wrapper around git blame that ignores certain commits.
7"""
8
9from __future__ import print_function
10
11import argparse
12import collections
13import logging
14import os
15import subprocess2
16import sys
17
18import git_common
19import git_dates
mgiuca@chromium.org63906ba2016-04-29 01:43:32 +000020import setup_color
mgiuca@chromium.org81937562016-02-03 08:00:53 +000021
22
23logging.getLogger().setLevel(logging.INFO)
24
25
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +000026DEFAULT_IGNORE_FILE_NAME = '.git-blame-ignore-revs'
27
28
mgiuca@chromium.org81937562016-02-03 08:00:53 +000029class Commit(object):
30 """Info about a commit."""
31 def __init__(self, commithash):
32 self.commithash = commithash
33 self.author = None
34 self.author_mail = None
35 self.author_time = None
36 self.author_tz = None
37 self.committer = None
38 self.committer_mail = None
39 self.committer_time = None
40 self.committer_tz = None
41 self.summary = None
42 self.boundary = None
43 self.previous = None
44 self.filename = None
45
46 def __repr__(self): # pragma: no cover
47 return '<Commit %s>' % self.commithash
48
49
50BlameLine = collections.namedtuple(
51 'BlameLine',
52 'commit context lineno_then lineno_now modified')
53
54
55def parse_blame(blameoutput):
56 """Parses the output of git blame -p into a data structure."""
57 lines = blameoutput.split('\n')
58 i = 0
59 commits = {}
60
61 while i < len(lines):
62 # Read a commit line and parse it.
63 line = lines[i]
64 i += 1
65 if not line.strip():
66 continue
67 commitline = line.split()
68 commithash = commitline[0]
69 lineno_then = int(commitline[1])
70 lineno_now = int(commitline[2])
71
72 try:
73 commit = commits[commithash]
74 except KeyError:
75 commit = Commit(commithash)
76 commits[commithash] = commit
77
78 # Read commit details until we find a context line.
79 while i < len(lines):
80 line = lines[i]
81 i += 1
82 if line.startswith('\t'):
83 break
84
85 try:
86 key, value = line.split(' ', 1)
87 except ValueError:
88 key = line
89 value = True
90 setattr(commit, key.replace('-', '_'), value)
91
92 context = line[1:]
93
94 yield BlameLine(commit, context, lineno_then, lineno_now, False)
95
96
97def print_table(table, colsep=' ', rowsep='\n', align=None, out=sys.stdout):
98 """Print a 2D rectangular array, aligning columns with spaces.
99
100 Args:
101 align: Optional string of 'l' and 'r', designating whether each column is
102 left- or right-aligned. Defaults to left aligned.
103 """
104 if len(table) == 0:
105 return
106
107 colwidths = None
108 for row in table:
109 if colwidths is None:
110 colwidths = [len(x) for x in row]
111 else:
112 colwidths = [max(colwidths[i], len(x)) for i, x in enumerate(row)]
113
114 if align is None: # pragma: no cover
115 align = 'l' * len(colwidths)
116
117 for row in table:
118 cells = []
119 for i, cell in enumerate(row):
120 padding = ' ' * (colwidths[i] - len(cell))
121 if align[i] == 'r':
122 cell = padding + cell
123 elif i < len(row) - 1:
124 # Do not pad the final column if left-aligned.
125 cell += padding
126 cells.append(cell)
127 try:
128 print(*cells, sep=colsep, end=rowsep, file=out)
129 except IOError: # pragma: no cover
130 # Can happen on Windows if the pipe is closed early.
131 pass
132
133
134def pretty_print(parsedblame, show_filenames=False, out=sys.stdout):
135 """Pretty-prints the output of parse_blame."""
136 table = []
137 for line in parsedblame:
138 author_time = git_dates.timestamp_offset_to_datetime(
139 line.commit.author_time, line.commit.author_tz)
140 row = [line.commit.commithash[:8],
141 '(' + line.commit.author,
142 git_dates.datetime_string(author_time),
143 str(line.lineno_now) + ('*' if line.modified else '') + ')',
144 line.context]
145 if show_filenames:
146 row.insert(1, line.commit.filename)
147 table.append(row)
148 print_table(table, align='llllrl' if show_filenames else 'lllrl', out=out)
149
150
151def get_parsed_blame(filename, revision='HEAD'):
152 blame = git_common.blame(filename, revision=revision, porcelain=True)
153 return list(parse_blame(blame))
154
155
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000156# Map from (oldrev, newrev) to hunk list (caching the results of git diff, but
157# only the hunk line numbers, not the actual diff contents).
158# hunk list contains (old, new) pairs, where old and new are (start, length)
159# pairs. A hunk list can also be None (if the diff failed).
160diff_hunks_cache = {}
161
162
163def cache_diff_hunks(oldrev, newrev):
164 def parse_start_length(s):
165 # Chop the '-' or '+'.
166 s = s[1:]
167 # Length is optional (defaults to 1).
168 try:
169 start, length = s.split(',')
170 except ValueError:
171 start = s
172 length = 1
173 return int(start), int(length)
174
175 try:
176 return diff_hunks_cache[(oldrev, newrev)]
177 except KeyError:
178 pass
179
180 # Use -U0 to get the smallest possible hunks.
181 diff = git_common.diff(oldrev, newrev, '-U0')
182
183 # Get all the hunks.
184 hunks = []
185 for line in diff.split('\n'):
186 if not line.startswith('@@'):
187 continue
188 ranges = line.split(' ', 3)[1:3]
189 ranges = tuple(parse_start_length(r) for r in ranges)
190 hunks.append(ranges)
191
192 diff_hunks_cache[(oldrev, newrev)] = hunks
193 return hunks
194
195
196def approx_lineno_across_revs(filename, newfilename, revision, newrevision,
197 lineno):
198 """Computes the approximate movement of a line number between two revisions.
199
200 Consider line |lineno| in |filename| at |revision|. This function computes the
201 line number of that line in |newfilename| at |newrevision|. This is
202 necessarily approximate.
203
204 Args:
205 filename: The file (within the repo) at |revision|.
206 newfilename: The name of the same file at |newrevision|.
207 revision: A git revision.
208 newrevision: Another git revision. Note: Can be ahead or behind |revision|.
209 lineno: Line number within |filename| at |revision|.
210
211 Returns:
212 Line number within |newfilename| at |newrevision|.
213 """
214 # This doesn't work that well if there are a lot of line changes within the
215 # hunk (demonstrated by GitHyperBlameLineMotionTest.testIntraHunkLineMotion).
216 # A fuzzy heuristic that takes the text of the new line and tries to find a
217 # deleted line within the hunk that mostly matches the new line could help.
218
219 # Use the <revision>:<filename> syntax to diff between two blobs. This is the
220 # only way to diff a file that has been renamed.
221 old = '%s:%s' % (revision, filename)
222 new = '%s:%s' % (newrevision, newfilename)
223 hunks = cache_diff_hunks(old, new)
224
225 cumulative_offset = 0
226
227 # Find the hunk containing lineno (if any).
228 for (oldstart, oldlength), (newstart, newlength) in hunks:
229 cumulative_offset += newlength - oldlength
230
231 if lineno >= oldstart + oldlength:
232 # Not there yet.
233 continue
234
235 if lineno < oldstart:
236 # Gone too far.
237 break
238
239 # lineno is in [oldstart, oldlength] at revision; [newstart, newlength] at
240 # newrevision.
241
242 # If newlength == 0, newstart will be the line before the deleted hunk.
243 # Since the line must have been deleted, just return that as the nearest
244 # line in the new file. Caution: newstart can be 0 in this case.
245 if newlength == 0:
246 return max(1, newstart)
247
248 newend = newstart + newlength - 1
249
250 # Move lineno based on the amount the entire hunk shifted.
251 lineno = lineno + newstart - oldstart
252 # Constrain the output within the range [newstart, newend].
253 return min(newend, max(newstart, lineno))
254
255 # Wasn't in a hunk. Figure out the line motion based on the difference in
256 # length between the hunks seen so far.
257 return lineno + cumulative_offset
258
259
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000260def hyper_blame(ignored, filename, revision='HEAD', out=sys.stdout,
261 err=sys.stderr):
262 # Map from commit to parsed blame from that commit.
263 blame_from = {}
264
265 def cache_blame_from(filename, commithash):
266 try:
267 return blame_from[commithash]
268 except KeyError:
269 parsed = get_parsed_blame(filename, commithash)
270 blame_from[commithash] = parsed
271 return parsed
272
273 try:
274 parsed = cache_blame_from(filename, git_common.hash_one(revision))
275 except subprocess2.CalledProcessError as e:
276 err.write(e.stderr)
277 return e.returncode
278
279 new_parsed = []
280
281 # We don't show filenames in blame output unless we have to.
282 show_filenames = False
283
284 for line in parsed:
285 # If a line references an ignored commit, blame that commit's parent
286 # repeatedly until we find a non-ignored commit.
287 while line.commit.commithash in ignored:
288 if line.commit.previous is None:
289 # You can't ignore the commit that added this file.
290 break
291
292 previouscommit, previousfilename = line.commit.previous.split(' ', 1)
293 parent_blame = cache_blame_from(previousfilename, previouscommit)
294
295 if len(parent_blame) == 0:
296 # The previous version of this file was empty, therefore, you can't
297 # ignore this commit.
298 break
299
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000300 # line.lineno_then is the line number in question at line.commit. We need
301 # to translate that line number so that it refers to the position of the
302 # same line on previouscommit.
303 lineno_previous = approx_lineno_across_revs(
304 line.commit.filename, previousfilename, line.commit.commithash,
305 previouscommit, line.lineno_then)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000306 logging.debug('ignore commit %s on line p%d/t%d/n%d',
307 line.commit.commithash, lineno_previous, line.lineno_then,
308 line.lineno_now)
309
310 # Get the line at lineno_previous in the parent commit.
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000311 assert 1 <= lineno_previous <= len(parent_blame)
312 newline = parent_blame[lineno_previous - 1]
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000313
314 # Replace the commit and lineno_then, but not the lineno_now or context.
315 logging.debug(' replacing with %r', newline)
316 line = BlameLine(newline.commit, line.context, lineno_previous,
317 line.lineno_now, True)
318
319 # If any line has a different filename to the file's current name, turn on
320 # filename display for the entire blame output.
321 if line.commit.filename != filename:
322 show_filenames = True
323
324 new_parsed.append(line)
325
326 pretty_print(new_parsed, show_filenames=show_filenames, out=out)
327
328 return 0
329
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000330
331def parse_ignore_file(ignore_file):
332 for line in ignore_file:
333 line = line.split('#', 1)[0].strip()
334 if line:
335 yield line
336
337
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000338def main(args, stdout=sys.stdout, stderr=sys.stderr):
339 parser = argparse.ArgumentParser(
340 prog='git hyper-blame',
341 description='git blame with support for ignoring certain commits.')
342 parser.add_argument('-i', metavar='REVISION', action='append', dest='ignored',
343 default=[], help='a revision to ignore')
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000344 parser.add_argument('--ignore-file', metavar='FILE',
345 type=argparse.FileType('r'), dest='ignore_file',
346 help='a file containing a list of revisions to ignore')
347 parser.add_argument('--no-default-ignores', dest='no_default_ignores',
348 help='Do not ignore commits from .git-blame-ignore-revs.')
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000349 parser.add_argument('revision', nargs='?', default='HEAD', metavar='REVISION',
350 help='revision to look at')
351 parser.add_argument('filename', metavar='FILE', help='filename to blame')
352
353 args = parser.parse_args(args)
354 try:
355 repo_root = git_common.repo_root()
356 except subprocess2.CalledProcessError as e:
357 stderr.write(e.stderr)
358 return e.returncode
359
360 # Make filename relative to the repository root, and cd to the root dir (so
361 # all filenames throughout this script are relative to the root).
362 filename = os.path.relpath(args.filename, repo_root)
363 os.chdir(repo_root)
364
365 # Normalize filename so we can compare it to other filenames git gives us.
366 filename = os.path.normpath(filename)
367 filename = os.path.normcase(filename)
368
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000369 ignored_list = list(args.ignored)
370 if not args.no_default_ignores and os.path.exists(DEFAULT_IGNORE_FILE_NAME):
371 with open(DEFAULT_IGNORE_FILE_NAME) as ignore_file:
372 ignored_list.extend(parse_ignore_file(ignore_file))
373
374 if args.ignore_file:
375 ignored_list.extend(parse_ignore_file(args.ignore_file))
376
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000377 ignored = set()
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000378 for c in ignored_list:
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000379 try:
380 ignored.add(git_common.hash_one(c))
381 except subprocess2.CalledProcessError as e:
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000382 # Custom warning string (the message from git-rev-parse is inappropriate).
383 stderr.write('warning: unknown revision \'%s\'.\n' % c)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000384
385 return hyper_blame(ignored, filename, args.revision, out=stdout, err=stderr)
386
387
388if __name__ == '__main__': # pragma: no cover
mgiuca@chromium.org63906ba2016-04-29 01:43:32 +0000389 setup_color.init()
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000390 with git_common.less() as less_input:
391 sys.exit(main(sys.argv[1:], stdout=less_input))