blob: b9965a7a69e7c606097e93632d03f6d64c38023e [file] [log] [blame]
mgiuca@chromium.org81937562016-02-03 08:00:53 +00001#!/usr/bin/env python
2# Copyright 2016 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Wrapper around git blame that ignores certain commits.
7"""
8
9from __future__ import print_function
10
11import argparse
12import collections
13import logging
14import os
15import subprocess2
16import sys
17
18import git_common
19import git_dates
20
21
22logging.getLogger().setLevel(logging.INFO)
23
24
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +000025DEFAULT_IGNORE_FILE_NAME = '.git-blame-ignore-revs'
26
27
mgiuca@chromium.org81937562016-02-03 08:00:53 +000028class Commit(object):
29 """Info about a commit."""
30 def __init__(self, commithash):
31 self.commithash = commithash
32 self.author = None
33 self.author_mail = None
34 self.author_time = None
35 self.author_tz = None
36 self.committer = None
37 self.committer_mail = None
38 self.committer_time = None
39 self.committer_tz = None
40 self.summary = None
41 self.boundary = None
42 self.previous = None
43 self.filename = None
44
45 def __repr__(self): # pragma: no cover
46 return '<Commit %s>' % self.commithash
47
48
49BlameLine = collections.namedtuple(
50 'BlameLine',
51 'commit context lineno_then lineno_now modified')
52
53
54def parse_blame(blameoutput):
55 """Parses the output of git blame -p into a data structure."""
56 lines = blameoutput.split('\n')
57 i = 0
58 commits = {}
59
60 while i < len(lines):
61 # Read a commit line and parse it.
62 line = lines[i]
63 i += 1
64 if not line.strip():
65 continue
66 commitline = line.split()
67 commithash = commitline[0]
68 lineno_then = int(commitline[1])
69 lineno_now = int(commitline[2])
70
71 try:
72 commit = commits[commithash]
73 except KeyError:
74 commit = Commit(commithash)
75 commits[commithash] = commit
76
77 # Read commit details until we find a context line.
78 while i < len(lines):
79 line = lines[i]
80 i += 1
81 if line.startswith('\t'):
82 break
83
84 try:
85 key, value = line.split(' ', 1)
86 except ValueError:
87 key = line
88 value = True
89 setattr(commit, key.replace('-', '_'), value)
90
91 context = line[1:]
92
93 yield BlameLine(commit, context, lineno_then, lineno_now, False)
94
95
96def print_table(table, colsep=' ', rowsep='\n', align=None, out=sys.stdout):
97 """Print a 2D rectangular array, aligning columns with spaces.
98
99 Args:
100 align: Optional string of 'l' and 'r', designating whether each column is
101 left- or right-aligned. Defaults to left aligned.
102 """
103 if len(table) == 0:
104 return
105
106 colwidths = None
107 for row in table:
108 if colwidths is None:
109 colwidths = [len(x) for x in row]
110 else:
111 colwidths = [max(colwidths[i], len(x)) for i, x in enumerate(row)]
112
113 if align is None: # pragma: no cover
114 align = 'l' * len(colwidths)
115
116 for row in table:
117 cells = []
118 for i, cell in enumerate(row):
119 padding = ' ' * (colwidths[i] - len(cell))
120 if align[i] == 'r':
121 cell = padding + cell
122 elif i < len(row) - 1:
123 # Do not pad the final column if left-aligned.
124 cell += padding
125 cells.append(cell)
126 try:
127 print(*cells, sep=colsep, end=rowsep, file=out)
128 except IOError: # pragma: no cover
129 # Can happen on Windows if the pipe is closed early.
130 pass
131
132
133def pretty_print(parsedblame, show_filenames=False, out=sys.stdout):
134 """Pretty-prints the output of parse_blame."""
135 table = []
136 for line in parsedblame:
137 author_time = git_dates.timestamp_offset_to_datetime(
138 line.commit.author_time, line.commit.author_tz)
139 row = [line.commit.commithash[:8],
140 '(' + line.commit.author,
141 git_dates.datetime_string(author_time),
142 str(line.lineno_now) + ('*' if line.modified else '') + ')',
143 line.context]
144 if show_filenames:
145 row.insert(1, line.commit.filename)
146 table.append(row)
147 print_table(table, align='llllrl' if show_filenames else 'lllrl', out=out)
148
149
150def get_parsed_blame(filename, revision='HEAD'):
151 blame = git_common.blame(filename, revision=revision, porcelain=True)
152 return list(parse_blame(blame))
153
154
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000155# Map from (oldrev, newrev) to hunk list (caching the results of git diff, but
156# only the hunk line numbers, not the actual diff contents).
157# hunk list contains (old, new) pairs, where old and new are (start, length)
158# pairs. A hunk list can also be None (if the diff failed).
159diff_hunks_cache = {}
160
161
162def cache_diff_hunks(oldrev, newrev):
163 def parse_start_length(s):
164 # Chop the '-' or '+'.
165 s = s[1:]
166 # Length is optional (defaults to 1).
167 try:
168 start, length = s.split(',')
169 except ValueError:
170 start = s
171 length = 1
172 return int(start), int(length)
173
174 try:
175 return diff_hunks_cache[(oldrev, newrev)]
176 except KeyError:
177 pass
178
179 # Use -U0 to get the smallest possible hunks.
180 diff = git_common.diff(oldrev, newrev, '-U0')
181
182 # Get all the hunks.
183 hunks = []
184 for line in diff.split('\n'):
185 if not line.startswith('@@'):
186 continue
187 ranges = line.split(' ', 3)[1:3]
188 ranges = tuple(parse_start_length(r) for r in ranges)
189 hunks.append(ranges)
190
191 diff_hunks_cache[(oldrev, newrev)] = hunks
192 return hunks
193
194
195def approx_lineno_across_revs(filename, newfilename, revision, newrevision,
196 lineno):
197 """Computes the approximate movement of a line number between two revisions.
198
199 Consider line |lineno| in |filename| at |revision|. This function computes the
200 line number of that line in |newfilename| at |newrevision|. This is
201 necessarily approximate.
202
203 Args:
204 filename: The file (within the repo) at |revision|.
205 newfilename: The name of the same file at |newrevision|.
206 revision: A git revision.
207 newrevision: Another git revision. Note: Can be ahead or behind |revision|.
208 lineno: Line number within |filename| at |revision|.
209
210 Returns:
211 Line number within |newfilename| at |newrevision|.
212 """
213 # This doesn't work that well if there are a lot of line changes within the
214 # hunk (demonstrated by GitHyperBlameLineMotionTest.testIntraHunkLineMotion).
215 # A fuzzy heuristic that takes the text of the new line and tries to find a
216 # deleted line within the hunk that mostly matches the new line could help.
217
218 # Use the <revision>:<filename> syntax to diff between two blobs. This is the
219 # only way to diff a file that has been renamed.
220 old = '%s:%s' % (revision, filename)
221 new = '%s:%s' % (newrevision, newfilename)
222 hunks = cache_diff_hunks(old, new)
223
224 cumulative_offset = 0
225
226 # Find the hunk containing lineno (if any).
227 for (oldstart, oldlength), (newstart, newlength) in hunks:
228 cumulative_offset += newlength - oldlength
229
230 if lineno >= oldstart + oldlength:
231 # Not there yet.
232 continue
233
234 if lineno < oldstart:
235 # Gone too far.
236 break
237
238 # lineno is in [oldstart, oldlength] at revision; [newstart, newlength] at
239 # newrevision.
240
241 # If newlength == 0, newstart will be the line before the deleted hunk.
242 # Since the line must have been deleted, just return that as the nearest
243 # line in the new file. Caution: newstart can be 0 in this case.
244 if newlength == 0:
245 return max(1, newstart)
246
247 newend = newstart + newlength - 1
248
249 # Move lineno based on the amount the entire hunk shifted.
250 lineno = lineno + newstart - oldstart
251 # Constrain the output within the range [newstart, newend].
252 return min(newend, max(newstart, lineno))
253
254 # Wasn't in a hunk. Figure out the line motion based on the difference in
255 # length between the hunks seen so far.
256 return lineno + cumulative_offset
257
258
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000259def hyper_blame(ignored, filename, revision='HEAD', out=sys.stdout,
260 err=sys.stderr):
261 # Map from commit to parsed blame from that commit.
262 blame_from = {}
263
264 def cache_blame_from(filename, commithash):
265 try:
266 return blame_from[commithash]
267 except KeyError:
268 parsed = get_parsed_blame(filename, commithash)
269 blame_from[commithash] = parsed
270 return parsed
271
272 try:
273 parsed = cache_blame_from(filename, git_common.hash_one(revision))
274 except subprocess2.CalledProcessError as e:
275 err.write(e.stderr)
276 return e.returncode
277
278 new_parsed = []
279
280 # We don't show filenames in blame output unless we have to.
281 show_filenames = False
282
283 for line in parsed:
284 # If a line references an ignored commit, blame that commit's parent
285 # repeatedly until we find a non-ignored commit.
286 while line.commit.commithash in ignored:
287 if line.commit.previous is None:
288 # You can't ignore the commit that added this file.
289 break
290
291 previouscommit, previousfilename = line.commit.previous.split(' ', 1)
292 parent_blame = cache_blame_from(previousfilename, previouscommit)
293
294 if len(parent_blame) == 0:
295 # The previous version of this file was empty, therefore, you can't
296 # ignore this commit.
297 break
298
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000299 # line.lineno_then is the line number in question at line.commit. We need
300 # to translate that line number so that it refers to the position of the
301 # same line on previouscommit.
302 lineno_previous = approx_lineno_across_revs(
303 line.commit.filename, previousfilename, line.commit.commithash,
304 previouscommit, line.lineno_then)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000305 logging.debug('ignore commit %s on line p%d/t%d/n%d',
306 line.commit.commithash, lineno_previous, line.lineno_then,
307 line.lineno_now)
308
309 # Get the line at lineno_previous in the parent commit.
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000310 assert 1 <= lineno_previous <= len(parent_blame)
311 newline = parent_blame[lineno_previous - 1]
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000312
313 # Replace the commit and lineno_then, but not the lineno_now or context.
314 logging.debug(' replacing with %r', newline)
315 line = BlameLine(newline.commit, line.context, lineno_previous,
316 line.lineno_now, True)
317
318 # If any line has a different filename to the file's current name, turn on
319 # filename display for the entire blame output.
320 if line.commit.filename != filename:
321 show_filenames = True
322
323 new_parsed.append(line)
324
325 pretty_print(new_parsed, show_filenames=show_filenames, out=out)
326
327 return 0
328
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000329
330def parse_ignore_file(ignore_file):
331 for line in ignore_file:
332 line = line.split('#', 1)[0].strip()
333 if line:
334 yield line
335
336
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000337def main(args, stdout=sys.stdout, stderr=sys.stderr):
338 parser = argparse.ArgumentParser(
339 prog='git hyper-blame',
340 description='git blame with support for ignoring certain commits.')
341 parser.add_argument('-i', metavar='REVISION', action='append', dest='ignored',
342 default=[], help='a revision to ignore')
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000343 parser.add_argument('--ignore-file', metavar='FILE',
344 type=argparse.FileType('r'), dest='ignore_file',
345 help='a file containing a list of revisions to ignore')
346 parser.add_argument('--no-default-ignores', dest='no_default_ignores',
347 help='Do not ignore commits from .git-blame-ignore-revs.')
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000348 parser.add_argument('revision', nargs='?', default='HEAD', metavar='REVISION',
349 help='revision to look at')
350 parser.add_argument('filename', metavar='FILE', help='filename to blame')
351
352 args = parser.parse_args(args)
353 try:
354 repo_root = git_common.repo_root()
355 except subprocess2.CalledProcessError as e:
356 stderr.write(e.stderr)
357 return e.returncode
358
359 # Make filename relative to the repository root, and cd to the root dir (so
360 # all filenames throughout this script are relative to the root).
361 filename = os.path.relpath(args.filename, repo_root)
362 os.chdir(repo_root)
363
364 # Normalize filename so we can compare it to other filenames git gives us.
365 filename = os.path.normpath(filename)
366 filename = os.path.normcase(filename)
367
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000368 ignored_list = list(args.ignored)
369 if not args.no_default_ignores and os.path.exists(DEFAULT_IGNORE_FILE_NAME):
370 with open(DEFAULT_IGNORE_FILE_NAME) as ignore_file:
371 ignored_list.extend(parse_ignore_file(ignore_file))
372
373 if args.ignore_file:
374 ignored_list.extend(parse_ignore_file(args.ignore_file))
375
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000376 ignored = set()
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000377 for c in ignored_list:
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000378 try:
379 ignored.add(git_common.hash_one(c))
380 except subprocess2.CalledProcessError as e:
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000381 # Custom warning string (the message from git-rev-parse is inappropriate).
382 stderr.write('warning: unknown revision \'%s\'.\n' % c)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000383
384 return hyper_blame(ignored, filename, args.revision, out=stdout, err=stderr)
385
386
387if __name__ == '__main__': # pragma: no cover
388 with git_common.less() as less_input:
389 sys.exit(main(sys.argv[1:], stdout=less_input))