blob: 44e3f8a03bf4e4e3658c96b21c356da644dc55a2 [file] [log] [blame]
Edward Lesmes98eda3f2019-08-12 21:09:53 +00001#!/usr/bin/env python
mgiuca@chromium.org81937562016-02-03 08:00:53 +00002# Copyright 2016 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Wrapper around git blame that ignores certain commits.
7"""
8
9from __future__ import print_function
Edward Lemur12a537f2019-10-03 21:57:15 +000010from __future__ import unicode_literals
mgiuca@chromium.org81937562016-02-03 08:00:53 +000011
12import argparse
13import collections
14import logging
15import os
16import subprocess2
17import sys
18
19import git_common
20import git_dates
mgiuca@chromium.org63906ba2016-04-29 01:43:32 +000021import setup_color
mgiuca@chromium.org81937562016-02-03 08:00:53 +000022
23
24logging.getLogger().setLevel(logging.INFO)
25
26
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +000027DEFAULT_IGNORE_FILE_NAME = '.git-blame-ignore-revs'
28
29
mgiuca@chromium.org81937562016-02-03 08:00:53 +000030class Commit(object):
31 """Info about a commit."""
32 def __init__(self, commithash):
33 self.commithash = commithash
34 self.author = None
35 self.author_mail = None
36 self.author_time = None
37 self.author_tz = None
38 self.committer = None
39 self.committer_mail = None
40 self.committer_time = None
41 self.committer_tz = None
42 self.summary = None
43 self.boundary = None
44 self.previous = None
45 self.filename = None
46
47 def __repr__(self): # pragma: no cover
48 return '<Commit %s>' % self.commithash
49
50
51BlameLine = collections.namedtuple(
52 'BlameLine',
53 'commit context lineno_then lineno_now modified')
54
55
56def parse_blame(blameoutput):
57 """Parses the output of git blame -p into a data structure."""
58 lines = blameoutput.split('\n')
59 i = 0
60 commits = {}
61
62 while i < len(lines):
63 # Read a commit line and parse it.
64 line = lines[i]
65 i += 1
66 if not line.strip():
67 continue
68 commitline = line.split()
69 commithash = commitline[0]
70 lineno_then = int(commitline[1])
71 lineno_now = int(commitline[2])
72
73 try:
74 commit = commits[commithash]
75 except KeyError:
76 commit = Commit(commithash)
77 commits[commithash] = commit
78
79 # Read commit details until we find a context line.
80 while i < len(lines):
81 line = lines[i]
82 i += 1
83 if line.startswith('\t'):
84 break
85
86 try:
87 key, value = line.split(' ', 1)
88 except ValueError:
89 key = line
90 value = True
91 setattr(commit, key.replace('-', '_'), value)
92
93 context = line[1:]
94
95 yield BlameLine(commit, context, lineno_then, lineno_now, False)
96
97
Edward Lemur5e94b802019-11-26 21:44:08 +000098def print_table(table, align=None, out=sys.stdout):
mgiuca@chromium.org81937562016-02-03 08:00:53 +000099 """Print a 2D rectangular array, aligning columns with spaces.
100
101 Args:
102 align: Optional string of 'l' and 'r', designating whether each column is
103 left- or right-aligned. Defaults to left aligned.
104 """
105 if len(table) == 0:
106 return
107
108 colwidths = None
109 for row in table:
110 if colwidths is None:
Edward Lemur12a537f2019-10-03 21:57:15 +0000111 colwidths = [len(x) for x in row]
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000112 else:
Edward Lemur12a537f2019-10-03 21:57:15 +0000113 colwidths = [max(colwidths[i], len(x)) for i, x in enumerate(row)]
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000114
115 if align is None: # pragma: no cover
116 align = 'l' * len(colwidths)
117
118 for row in table:
119 cells = []
120 for i, cell in enumerate(row):
Edward Lemur12a537f2019-10-03 21:57:15 +0000121 padding = ' ' * (colwidths[i] - len(cell))
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000122 if align[i] == 'r':
123 cell = padding + cell
124 elif i < len(row) - 1:
125 # Do not pad the final column if left-aligned.
126 cell += padding
Edward Lemur5e94b802019-11-26 21:44:08 +0000127 cell = cell.encode('utf-8', 'replace')
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000128 cells.append(cell)
129 try:
Edward Lemur5e94b802019-11-26 21:44:08 +0000130 out.write(b' '.join(cells) + b'\n')
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000131 except IOError: # pragma: no cover
132 # Can happen on Windows if the pipe is closed early.
133 pass
134
135
136def pretty_print(parsedblame, show_filenames=False, out=sys.stdout):
137 """Pretty-prints the output of parse_blame."""
138 table = []
139 for line in parsedblame:
140 author_time = git_dates.timestamp_offset_to_datetime(
141 line.commit.author_time, line.commit.author_tz)
142 row = [line.commit.commithash[:8],
143 '(' + line.commit.author,
144 git_dates.datetime_string(author_time),
145 str(line.lineno_now) + ('*' if line.modified else '') + ')',
146 line.context]
147 if show_filenames:
148 row.insert(1, line.commit.filename)
149 table.append(row)
150 print_table(table, align='llllrl' if show_filenames else 'lllrl', out=out)
151
152
153def get_parsed_blame(filename, revision='HEAD'):
154 blame = git_common.blame(filename, revision=revision, porcelain=True)
155 return list(parse_blame(blame))
156
157
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000158# Map from (oldrev, newrev) to hunk list (caching the results of git diff, but
159# only the hunk line numbers, not the actual diff contents).
160# hunk list contains (old, new) pairs, where old and new are (start, length)
161# pairs. A hunk list can also be None (if the diff failed).
162diff_hunks_cache = {}
163
164
165def cache_diff_hunks(oldrev, newrev):
166 def parse_start_length(s):
167 # Chop the '-' or '+'.
168 s = s[1:]
169 # Length is optional (defaults to 1).
170 try:
171 start, length = s.split(',')
172 except ValueError:
173 start = s
174 length = 1
175 return int(start), int(length)
176
177 try:
178 return diff_hunks_cache[(oldrev, newrev)]
179 except KeyError:
180 pass
181
182 # Use -U0 to get the smallest possible hunks.
183 diff = git_common.diff(oldrev, newrev, '-U0')
184
185 # Get all the hunks.
186 hunks = []
187 for line in diff.split('\n'):
188 if not line.startswith('@@'):
189 continue
190 ranges = line.split(' ', 3)[1:3]
191 ranges = tuple(parse_start_length(r) for r in ranges)
192 hunks.append(ranges)
193
194 diff_hunks_cache[(oldrev, newrev)] = hunks
195 return hunks
196
197
198def approx_lineno_across_revs(filename, newfilename, revision, newrevision,
199 lineno):
200 """Computes the approximate movement of a line number between two revisions.
201
202 Consider line |lineno| in |filename| at |revision|. This function computes the
203 line number of that line in |newfilename| at |newrevision|. This is
204 necessarily approximate.
205
206 Args:
207 filename: The file (within the repo) at |revision|.
208 newfilename: The name of the same file at |newrevision|.
209 revision: A git revision.
210 newrevision: Another git revision. Note: Can be ahead or behind |revision|.
211 lineno: Line number within |filename| at |revision|.
212
213 Returns:
214 Line number within |newfilename| at |newrevision|.
215 """
216 # This doesn't work that well if there are a lot of line changes within the
217 # hunk (demonstrated by GitHyperBlameLineMotionTest.testIntraHunkLineMotion).
218 # A fuzzy heuristic that takes the text of the new line and tries to find a
219 # deleted line within the hunk that mostly matches the new line could help.
220
221 # Use the <revision>:<filename> syntax to diff between two blobs. This is the
222 # only way to diff a file that has been renamed.
223 old = '%s:%s' % (revision, filename)
224 new = '%s:%s' % (newrevision, newfilename)
225 hunks = cache_diff_hunks(old, new)
226
227 cumulative_offset = 0
228
229 # Find the hunk containing lineno (if any).
230 for (oldstart, oldlength), (newstart, newlength) in hunks:
231 cumulative_offset += newlength - oldlength
232
233 if lineno >= oldstart + oldlength:
234 # Not there yet.
235 continue
236
237 if lineno < oldstart:
238 # Gone too far.
239 break
240
241 # lineno is in [oldstart, oldlength] at revision; [newstart, newlength] at
242 # newrevision.
243
244 # If newlength == 0, newstart will be the line before the deleted hunk.
245 # Since the line must have been deleted, just return that as the nearest
246 # line in the new file. Caution: newstart can be 0 in this case.
247 if newlength == 0:
248 return max(1, newstart)
249
250 newend = newstart + newlength - 1
251
252 # Move lineno based on the amount the entire hunk shifted.
253 lineno = lineno + newstart - oldstart
254 # Constrain the output within the range [newstart, newend].
255 return min(newend, max(newstart, lineno))
256
257 # Wasn't in a hunk. Figure out the line motion based on the difference in
258 # length between the hunks seen so far.
259 return lineno + cumulative_offset
260
261
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000262def hyper_blame(ignored, filename, revision='HEAD', out=sys.stdout,
263 err=sys.stderr):
264 # Map from commit to parsed blame from that commit.
265 blame_from = {}
266
267 def cache_blame_from(filename, commithash):
268 try:
269 return blame_from[commithash]
270 except KeyError:
271 parsed = get_parsed_blame(filename, commithash)
272 blame_from[commithash] = parsed
273 return parsed
274
275 try:
276 parsed = cache_blame_from(filename, git_common.hash_one(revision))
277 except subprocess2.CalledProcessError as e:
Edward Lemur12a537f2019-10-03 21:57:15 +0000278 err.write(e.stderr.decode())
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000279 return e.returncode
280
281 new_parsed = []
282
283 # We don't show filenames in blame output unless we have to.
284 show_filenames = False
285
286 for line in parsed:
287 # If a line references an ignored commit, blame that commit's parent
288 # repeatedly until we find a non-ignored commit.
289 while line.commit.commithash in ignored:
290 if line.commit.previous is None:
291 # You can't ignore the commit that added this file.
292 break
293
294 previouscommit, previousfilename = line.commit.previous.split(' ', 1)
295 parent_blame = cache_blame_from(previousfilename, previouscommit)
296
297 if len(parent_blame) == 0:
298 # The previous version of this file was empty, therefore, you can't
299 # ignore this commit.
300 break
301
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000302 # line.lineno_then is the line number in question at line.commit. We need
303 # to translate that line number so that it refers to the position of the
304 # same line on previouscommit.
305 lineno_previous = approx_lineno_across_revs(
306 line.commit.filename, previousfilename, line.commit.commithash,
307 previouscommit, line.lineno_then)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000308 logging.debug('ignore commit %s on line p%d/t%d/n%d',
309 line.commit.commithash, lineno_previous, line.lineno_then,
310 line.lineno_now)
311
312 # Get the line at lineno_previous in the parent commit.
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000313 assert 1 <= lineno_previous <= len(parent_blame)
314 newline = parent_blame[lineno_previous - 1]
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000315
316 # Replace the commit and lineno_then, but not the lineno_now or context.
Matt Giuca2cd3c142017-04-10 17:31:44 +1000317 line = BlameLine(newline.commit, line.context, newline.lineno_then,
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000318 line.lineno_now, True)
Matt Giuca2cd3c142017-04-10 17:31:44 +1000319 logging.debug(' replacing with %r', line)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000320
321 # If any line has a different filename to the file's current name, turn on
322 # filename display for the entire blame output.
323 if line.commit.filename != filename:
324 show_filenames = True
325
326 new_parsed.append(line)
327
328 pretty_print(new_parsed, show_filenames=show_filenames, out=out)
329
330 return 0
331
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000332
333def parse_ignore_file(ignore_file):
334 for line in ignore_file:
335 line = line.split('#', 1)[0].strip()
336 if line:
337 yield line
338
339
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000340def main(args, stdout=sys.stdout, stderr=sys.stderr):
341 parser = argparse.ArgumentParser(
342 prog='git hyper-blame',
343 description='git blame with support for ignoring certain commits.')
344 parser.add_argument('-i', metavar='REVISION', action='append', dest='ignored',
345 default=[], help='a revision to ignore')
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000346 parser.add_argument('--ignore-file', metavar='FILE',
347 type=argparse.FileType('r'), dest='ignore_file',
348 help='a file containing a list of revisions to ignore')
349 parser.add_argument('--no-default-ignores', dest='no_default_ignores',
Matt Giuca17a53072017-04-10 15:27:55 +1000350 action='store_true',
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000351 help='Do not ignore commits from .git-blame-ignore-revs.')
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000352 parser.add_argument('revision', nargs='?', default='HEAD', metavar='REVISION',
353 help='revision to look at')
354 parser.add_argument('filename', metavar='FILE', help='filename to blame')
355
356 args = parser.parse_args(args)
357 try:
358 repo_root = git_common.repo_root()
359 except subprocess2.CalledProcessError as e:
Edward Lemur12a537f2019-10-03 21:57:15 +0000360 stderr.write(e.stderr.decode())
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000361 return e.returncode
362
363 # Make filename relative to the repository root, and cd to the root dir (so
364 # all filenames throughout this script are relative to the root).
365 filename = os.path.relpath(args.filename, repo_root)
366 os.chdir(repo_root)
367
368 # Normalize filename so we can compare it to other filenames git gives us.
369 filename = os.path.normpath(filename)
370 filename = os.path.normcase(filename)
371
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000372 ignored_list = list(args.ignored)
373 if not args.no_default_ignores and os.path.exists(DEFAULT_IGNORE_FILE_NAME):
374 with open(DEFAULT_IGNORE_FILE_NAME) as ignore_file:
375 ignored_list.extend(parse_ignore_file(ignore_file))
376
377 if args.ignore_file:
378 ignored_list.extend(parse_ignore_file(args.ignore_file))
379
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000380 ignored = set()
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000381 for c in ignored_list:
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000382 try:
383 ignored.add(git_common.hash_one(c))
384 except subprocess2.CalledProcessError as e:
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000385 # Custom warning string (the message from git-rev-parse is inappropriate).
386 stderr.write('warning: unknown revision \'%s\'.\n' % c)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000387
388 return hyper_blame(ignored, filename, args.revision, out=stdout, err=stderr)
389
390
391if __name__ == '__main__': # pragma: no cover
mgiuca@chromium.org63906ba2016-04-29 01:43:32 +0000392 setup_color.init()
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000393 with git_common.less() as less_input:
394 sys.exit(main(sys.argv[1:], stdout=less_input))