blob: 3e085134db3a8913cbe991b2677416e44029ce13 [file] [log] [blame]
Edward Lesmes98eda3f2019-08-12 21:09:53 +00001#!/usr/bin/env python
mgiuca@chromium.org81937562016-02-03 08:00:53 +00002# Copyright 2016 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Wrapper around git blame that ignores certain commits.
7"""
8
9from __future__ import print_function
10
11import argparse
12import collections
13import logging
14import os
15import subprocess2
16import sys
17
18import git_common
19import git_dates
mgiuca@chromium.org63906ba2016-04-29 01:43:32 +000020import setup_color
mgiuca@chromium.org81937562016-02-03 08:00:53 +000021
22
23logging.getLogger().setLevel(logging.INFO)
24
25
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +000026DEFAULT_IGNORE_FILE_NAME = '.git-blame-ignore-revs'
27
28
mgiuca@chromium.org81937562016-02-03 08:00:53 +000029class Commit(object):
30 """Info about a commit."""
31 def __init__(self, commithash):
32 self.commithash = commithash
33 self.author = None
34 self.author_mail = None
35 self.author_time = None
36 self.author_tz = None
37 self.committer = None
38 self.committer_mail = None
39 self.committer_time = None
40 self.committer_tz = None
41 self.summary = None
42 self.boundary = None
43 self.previous = None
44 self.filename = None
45
46 def __repr__(self): # pragma: no cover
47 return '<Commit %s>' % self.commithash
48
49
50BlameLine = collections.namedtuple(
51 'BlameLine',
52 'commit context lineno_then lineno_now modified')
53
54
55def parse_blame(blameoutput):
56 """Parses the output of git blame -p into a data structure."""
57 lines = blameoutput.split('\n')
58 i = 0
59 commits = {}
60
61 while i < len(lines):
62 # Read a commit line and parse it.
63 line = lines[i]
64 i += 1
65 if not line.strip():
66 continue
67 commitline = line.split()
68 commithash = commitline[0]
69 lineno_then = int(commitline[1])
70 lineno_now = int(commitline[2])
71
72 try:
73 commit = commits[commithash]
74 except KeyError:
75 commit = Commit(commithash)
76 commits[commithash] = commit
77
78 # Read commit details until we find a context line.
79 while i < len(lines):
80 line = lines[i]
81 i += 1
82 if line.startswith('\t'):
83 break
84
85 try:
86 key, value = line.split(' ', 1)
87 except ValueError:
88 key = line
89 value = True
90 setattr(commit, key.replace('-', '_'), value)
91
92 context = line[1:]
93
94 yield BlameLine(commit, context, lineno_then, lineno_now, False)
95
96
Matt Giucaa148b5e2018-02-07 13:24:50 +110097def num_codepoints(s):
98 """Gets the length of a UTF-8 byte string, in Unicode codepoints."""
99 return len(s.decode('utf-8', errors='replace'))
100
101
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000102def print_table(table, colsep=' ', rowsep='\n', align=None, out=sys.stdout):
103 """Print a 2D rectangular array, aligning columns with spaces.
104
105 Args:
106 align: Optional string of 'l' and 'r', designating whether each column is
107 left- or right-aligned. Defaults to left aligned.
108 """
109 if len(table) == 0:
110 return
111
112 colwidths = None
113 for row in table:
114 if colwidths is None:
Matt Giucaa148b5e2018-02-07 13:24:50 +1100115 colwidths = [num_codepoints(x) for x in row]
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000116 else:
Matt Giucaa148b5e2018-02-07 13:24:50 +1100117 colwidths = [max(colwidths[i], num_codepoints(x))
118 for i, x in enumerate(row)]
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000119
120 if align is None: # pragma: no cover
121 align = 'l' * len(colwidths)
122
123 for row in table:
124 cells = []
125 for i, cell in enumerate(row):
Matt Giucaa148b5e2018-02-07 13:24:50 +1100126 padding = ' ' * (colwidths[i] - num_codepoints(cell))
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000127 if align[i] == 'r':
128 cell = padding + cell
129 elif i < len(row) - 1:
130 # Do not pad the final column if left-aligned.
131 cell += padding
132 cells.append(cell)
133 try:
134 print(*cells, sep=colsep, end=rowsep, file=out)
135 except IOError: # pragma: no cover
136 # Can happen on Windows if the pipe is closed early.
137 pass
138
139
140def pretty_print(parsedblame, show_filenames=False, out=sys.stdout):
141 """Pretty-prints the output of parse_blame."""
142 table = []
143 for line in parsedblame:
144 author_time = git_dates.timestamp_offset_to_datetime(
145 line.commit.author_time, line.commit.author_tz)
146 row = [line.commit.commithash[:8],
147 '(' + line.commit.author,
148 git_dates.datetime_string(author_time),
149 str(line.lineno_now) + ('*' if line.modified else '') + ')',
150 line.context]
151 if show_filenames:
152 row.insert(1, line.commit.filename)
153 table.append(row)
154 print_table(table, align='llllrl' if show_filenames else 'lllrl', out=out)
155
156
157def get_parsed_blame(filename, revision='HEAD'):
158 blame = git_common.blame(filename, revision=revision, porcelain=True)
159 return list(parse_blame(blame))
160
161
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000162# Map from (oldrev, newrev) to hunk list (caching the results of git diff, but
163# only the hunk line numbers, not the actual diff contents).
164# hunk list contains (old, new) pairs, where old and new are (start, length)
165# pairs. A hunk list can also be None (if the diff failed).
166diff_hunks_cache = {}
167
168
169def cache_diff_hunks(oldrev, newrev):
170 def parse_start_length(s):
171 # Chop the '-' or '+'.
172 s = s[1:]
173 # Length is optional (defaults to 1).
174 try:
175 start, length = s.split(',')
176 except ValueError:
177 start = s
178 length = 1
179 return int(start), int(length)
180
181 try:
182 return diff_hunks_cache[(oldrev, newrev)]
183 except KeyError:
184 pass
185
186 # Use -U0 to get the smallest possible hunks.
187 diff = git_common.diff(oldrev, newrev, '-U0')
188
189 # Get all the hunks.
190 hunks = []
191 for line in diff.split('\n'):
192 if not line.startswith('@@'):
193 continue
194 ranges = line.split(' ', 3)[1:3]
195 ranges = tuple(parse_start_length(r) for r in ranges)
196 hunks.append(ranges)
197
198 diff_hunks_cache[(oldrev, newrev)] = hunks
199 return hunks
200
201
202def approx_lineno_across_revs(filename, newfilename, revision, newrevision,
203 lineno):
204 """Computes the approximate movement of a line number between two revisions.
205
206 Consider line |lineno| in |filename| at |revision|. This function computes the
207 line number of that line in |newfilename| at |newrevision|. This is
208 necessarily approximate.
209
210 Args:
211 filename: The file (within the repo) at |revision|.
212 newfilename: The name of the same file at |newrevision|.
213 revision: A git revision.
214 newrevision: Another git revision. Note: Can be ahead or behind |revision|.
215 lineno: Line number within |filename| at |revision|.
216
217 Returns:
218 Line number within |newfilename| at |newrevision|.
219 """
220 # This doesn't work that well if there are a lot of line changes within the
221 # hunk (demonstrated by GitHyperBlameLineMotionTest.testIntraHunkLineMotion).
222 # A fuzzy heuristic that takes the text of the new line and tries to find a
223 # deleted line within the hunk that mostly matches the new line could help.
224
225 # Use the <revision>:<filename> syntax to diff between two blobs. This is the
226 # only way to diff a file that has been renamed.
227 old = '%s:%s' % (revision, filename)
228 new = '%s:%s' % (newrevision, newfilename)
229 hunks = cache_diff_hunks(old, new)
230
231 cumulative_offset = 0
232
233 # Find the hunk containing lineno (if any).
234 for (oldstart, oldlength), (newstart, newlength) in hunks:
235 cumulative_offset += newlength - oldlength
236
237 if lineno >= oldstart + oldlength:
238 # Not there yet.
239 continue
240
241 if lineno < oldstart:
242 # Gone too far.
243 break
244
245 # lineno is in [oldstart, oldlength] at revision; [newstart, newlength] at
246 # newrevision.
247
248 # If newlength == 0, newstart will be the line before the deleted hunk.
249 # Since the line must have been deleted, just return that as the nearest
250 # line in the new file. Caution: newstart can be 0 in this case.
251 if newlength == 0:
252 return max(1, newstart)
253
254 newend = newstart + newlength - 1
255
256 # Move lineno based on the amount the entire hunk shifted.
257 lineno = lineno + newstart - oldstart
258 # Constrain the output within the range [newstart, newend].
259 return min(newend, max(newstart, lineno))
260
261 # Wasn't in a hunk. Figure out the line motion based on the difference in
262 # length between the hunks seen so far.
263 return lineno + cumulative_offset
264
265
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000266def hyper_blame(ignored, filename, revision='HEAD', out=sys.stdout,
267 err=sys.stderr):
268 # Map from commit to parsed blame from that commit.
269 blame_from = {}
270
271 def cache_blame_from(filename, commithash):
272 try:
273 return blame_from[commithash]
274 except KeyError:
275 parsed = get_parsed_blame(filename, commithash)
276 blame_from[commithash] = parsed
277 return parsed
278
279 try:
280 parsed = cache_blame_from(filename, git_common.hash_one(revision))
281 except subprocess2.CalledProcessError as e:
282 err.write(e.stderr)
283 return e.returncode
284
285 new_parsed = []
286
287 # We don't show filenames in blame output unless we have to.
288 show_filenames = False
289
290 for line in parsed:
291 # If a line references an ignored commit, blame that commit's parent
292 # repeatedly until we find a non-ignored commit.
293 while line.commit.commithash in ignored:
294 if line.commit.previous is None:
295 # You can't ignore the commit that added this file.
296 break
297
298 previouscommit, previousfilename = line.commit.previous.split(' ', 1)
299 parent_blame = cache_blame_from(previousfilename, previouscommit)
300
301 if len(parent_blame) == 0:
302 # The previous version of this file was empty, therefore, you can't
303 # ignore this commit.
304 break
305
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000306 # line.lineno_then is the line number in question at line.commit. We need
307 # to translate that line number so that it refers to the position of the
308 # same line on previouscommit.
309 lineno_previous = approx_lineno_across_revs(
310 line.commit.filename, previousfilename, line.commit.commithash,
311 previouscommit, line.lineno_then)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000312 logging.debug('ignore commit %s on line p%d/t%d/n%d',
313 line.commit.commithash, lineno_previous, line.lineno_then,
314 line.lineno_now)
315
316 # Get the line at lineno_previous in the parent commit.
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000317 assert 1 <= lineno_previous <= len(parent_blame)
318 newline = parent_blame[lineno_previous - 1]
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000319
320 # Replace the commit and lineno_then, but not the lineno_now or context.
Matt Giuca2cd3c142017-04-10 17:31:44 +1000321 line = BlameLine(newline.commit, line.context, newline.lineno_then,
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000322 line.lineno_now, True)
Matt Giuca2cd3c142017-04-10 17:31:44 +1000323 logging.debug(' replacing with %r', line)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000324
325 # If any line has a different filename to the file's current name, turn on
326 # filename display for the entire blame output.
327 if line.commit.filename != filename:
328 show_filenames = True
329
330 new_parsed.append(line)
331
332 pretty_print(new_parsed, show_filenames=show_filenames, out=out)
333
334 return 0
335
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000336
337def parse_ignore_file(ignore_file):
338 for line in ignore_file:
339 line = line.split('#', 1)[0].strip()
340 if line:
341 yield line
342
343
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000344def main(args, stdout=sys.stdout, stderr=sys.stderr):
345 parser = argparse.ArgumentParser(
346 prog='git hyper-blame',
347 description='git blame with support for ignoring certain commits.')
348 parser.add_argument('-i', metavar='REVISION', action='append', dest='ignored',
349 default=[], help='a revision to ignore')
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000350 parser.add_argument('--ignore-file', metavar='FILE',
351 type=argparse.FileType('r'), dest='ignore_file',
352 help='a file containing a list of revisions to ignore')
353 parser.add_argument('--no-default-ignores', dest='no_default_ignores',
Matt Giuca17a53072017-04-10 15:27:55 +1000354 action='store_true',
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000355 help='Do not ignore commits from .git-blame-ignore-revs.')
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000356 parser.add_argument('revision', nargs='?', default='HEAD', metavar='REVISION',
357 help='revision to look at')
358 parser.add_argument('filename', metavar='FILE', help='filename to blame')
359
360 args = parser.parse_args(args)
361 try:
362 repo_root = git_common.repo_root()
363 except subprocess2.CalledProcessError as e:
364 stderr.write(e.stderr)
365 return e.returncode
366
367 # Make filename relative to the repository root, and cd to the root dir (so
368 # all filenames throughout this script are relative to the root).
369 filename = os.path.relpath(args.filename, repo_root)
370 os.chdir(repo_root)
371
372 # Normalize filename so we can compare it to other filenames git gives us.
373 filename = os.path.normpath(filename)
374 filename = os.path.normcase(filename)
375
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000376 ignored_list = list(args.ignored)
377 if not args.no_default_ignores and os.path.exists(DEFAULT_IGNORE_FILE_NAME):
378 with open(DEFAULT_IGNORE_FILE_NAME) as ignore_file:
379 ignored_list.extend(parse_ignore_file(ignore_file))
380
381 if args.ignore_file:
382 ignored_list.extend(parse_ignore_file(args.ignore_file))
383
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000384 ignored = set()
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000385 for c in ignored_list:
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000386 try:
387 ignored.add(git_common.hash_one(c))
388 except subprocess2.CalledProcessError as e:
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000389 # Custom warning string (the message from git-rev-parse is inappropriate).
390 stderr.write('warning: unknown revision \'%s\'.\n' % c)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000391
392 return hyper_blame(ignored, filename, args.revision, out=stdout, err=stderr)
393
394
395if __name__ == '__main__': # pragma: no cover
mgiuca@chromium.org63906ba2016-04-29 01:43:32 +0000396 setup_color.init()
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000397 with git_common.less() as less_input:
398 sys.exit(main(sys.argv[1:], stdout=less_input))