blob: 560a3bf34b509508fec045d9199ddc77914c6a99 [file] [log] [blame]
Josip Sokcevic4de5dea2022-03-23 21:15:14 +00001#!/usr/bin/env python3
mgiuca@chromium.org81937562016-02-03 08:00:53 +00002# Copyright 2016 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Wrapper around git blame that ignores certain commits.
7"""
8
mgiuca@chromium.org81937562016-02-03 08:00:53 +00009import argparse
10import collections
11import logging
12import os
13import subprocess2
14import sys
15
16import git_common
17import git_dates
mgiuca@chromium.org63906ba2016-04-29 01:43:32 +000018import setup_color
mgiuca@chromium.org81937562016-02-03 08:00:53 +000019
20
21logging.getLogger().setLevel(logging.INFO)
22
23
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +000024DEFAULT_IGNORE_FILE_NAME = '.git-blame-ignore-revs'
25
26
mgiuca@chromium.org81937562016-02-03 08:00:53 +000027class Commit(object):
28 """Info about a commit."""
29 def __init__(self, commithash):
30 self.commithash = commithash
31 self.author = None
32 self.author_mail = None
33 self.author_time = None
34 self.author_tz = None
35 self.committer = None
36 self.committer_mail = None
37 self.committer_time = None
38 self.committer_tz = None
39 self.summary = None
40 self.boundary = None
41 self.previous = None
42 self.filename = None
43
44 def __repr__(self): # pragma: no cover
45 return '<Commit %s>' % self.commithash
46
47
48BlameLine = collections.namedtuple(
49 'BlameLine',
50 'commit context lineno_then lineno_now modified')
51
52
53def parse_blame(blameoutput):
54 """Parses the output of git blame -p into a data structure."""
55 lines = blameoutput.split('\n')
56 i = 0
57 commits = {}
58
59 while i < len(lines):
60 # Read a commit line and parse it.
61 line = lines[i]
62 i += 1
63 if not line.strip():
64 continue
65 commitline = line.split()
66 commithash = commitline[0]
67 lineno_then = int(commitline[1])
68 lineno_now = int(commitline[2])
69
70 try:
71 commit = commits[commithash]
72 except KeyError:
73 commit = Commit(commithash)
74 commits[commithash] = commit
75
76 # Read commit details until we find a context line.
77 while i < len(lines):
78 line = lines[i]
79 i += 1
80 if line.startswith('\t'):
81 break
82
83 try:
84 key, value = line.split(' ', 1)
85 except ValueError:
86 key = line
87 value = True
88 setattr(commit, key.replace('-', '_'), value)
89
90 context = line[1:]
91
92 yield BlameLine(commit, context, lineno_then, lineno_now, False)
93
94
Edward Lemur0d462e92020-01-08 20:11:31 +000095def print_table(outbuf, table, align):
mgiuca@chromium.org81937562016-02-03 08:00:53 +000096 """Print a 2D rectangular array, aligning columns with spaces.
97
98 Args:
Edward Lemur0d462e92020-01-08 20:11:31 +000099 align: string of 'l' and 'r', designating whether each column is left- or
100 right-aligned.
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000101 """
102 if len(table) == 0:
103 return
104
105 colwidths = None
106 for row in table:
107 if colwidths is None:
Edward Lemur12a537f2019-10-03 21:57:15 +0000108 colwidths = [len(x) for x in row]
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000109 else:
Edward Lemur12a537f2019-10-03 21:57:15 +0000110 colwidths = [max(colwidths[i], len(x)) for i, x in enumerate(row)]
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000111
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000112 for row in table:
113 cells = []
114 for i, cell in enumerate(row):
Edward Lemur12a537f2019-10-03 21:57:15 +0000115 padding = ' ' * (colwidths[i] - len(cell))
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000116 if align[i] == 'r':
117 cell = padding + cell
118 elif i < len(row) - 1:
119 # Do not pad the final column if left-aligned.
120 cell += padding
Edward Lemur0d462e92020-01-08 20:11:31 +0000121 cells.append(cell.encode('utf-8', 'replace'))
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000122 try:
Edward Lemur0d462e92020-01-08 20:11:31 +0000123 outbuf.write(b' '.join(cells) + b'\n')
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000124 except IOError: # pragma: no cover
125 # Can happen on Windows if the pipe is closed early.
126 pass
127
128
Edward Lemur0d462e92020-01-08 20:11:31 +0000129def pretty_print(outbuf, parsedblame, show_filenames=False):
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000130 """Pretty-prints the output of parse_blame."""
131 table = []
132 for line in parsedblame:
133 author_time = git_dates.timestamp_offset_to_datetime(
134 line.commit.author_time, line.commit.author_tz)
135 row = [line.commit.commithash[:8],
136 '(' + line.commit.author,
137 git_dates.datetime_string(author_time),
138 str(line.lineno_now) + ('*' if line.modified else '') + ')',
139 line.context]
140 if show_filenames:
141 row.insert(1, line.commit.filename)
142 table.append(row)
Edward Lemur0d462e92020-01-08 20:11:31 +0000143 print_table(outbuf, table, align='llllrl' if show_filenames else 'lllrl')
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000144
145
146def get_parsed_blame(filename, revision='HEAD'):
147 blame = git_common.blame(filename, revision=revision, porcelain=True)
148 return list(parse_blame(blame))
149
150
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000151# Map from (oldrev, newrev) to hunk list (caching the results of git diff, but
152# only the hunk line numbers, not the actual diff contents).
153# hunk list contains (old, new) pairs, where old and new are (start, length)
154# pairs. A hunk list can also be None (if the diff failed).
155diff_hunks_cache = {}
156
157
158def cache_diff_hunks(oldrev, newrev):
159 def parse_start_length(s):
160 # Chop the '-' or '+'.
161 s = s[1:]
162 # Length is optional (defaults to 1).
163 try:
164 start, length = s.split(',')
165 except ValueError:
166 start = s
167 length = 1
168 return int(start), int(length)
169
170 try:
171 return diff_hunks_cache[(oldrev, newrev)]
172 except KeyError:
173 pass
174
175 # Use -U0 to get the smallest possible hunks.
176 diff = git_common.diff(oldrev, newrev, '-U0')
177
178 # Get all the hunks.
179 hunks = []
180 for line in diff.split('\n'):
181 if not line.startswith('@@'):
182 continue
183 ranges = line.split(' ', 3)[1:3]
184 ranges = tuple(parse_start_length(r) for r in ranges)
185 hunks.append(ranges)
186
187 diff_hunks_cache[(oldrev, newrev)] = hunks
188 return hunks
189
190
191def approx_lineno_across_revs(filename, newfilename, revision, newrevision,
192 lineno):
193 """Computes the approximate movement of a line number between two revisions.
194
195 Consider line |lineno| in |filename| at |revision|. This function computes the
196 line number of that line in |newfilename| at |newrevision|. This is
197 necessarily approximate.
198
199 Args:
200 filename: The file (within the repo) at |revision|.
201 newfilename: The name of the same file at |newrevision|.
202 revision: A git revision.
203 newrevision: Another git revision. Note: Can be ahead or behind |revision|.
204 lineno: Line number within |filename| at |revision|.
205
206 Returns:
207 Line number within |newfilename| at |newrevision|.
208 """
209 # This doesn't work that well if there are a lot of line changes within the
210 # hunk (demonstrated by GitHyperBlameLineMotionTest.testIntraHunkLineMotion).
211 # A fuzzy heuristic that takes the text of the new line and tries to find a
212 # deleted line within the hunk that mostly matches the new line could help.
213
214 # Use the <revision>:<filename> syntax to diff between two blobs. This is the
215 # only way to diff a file that has been renamed.
216 old = '%s:%s' % (revision, filename)
217 new = '%s:%s' % (newrevision, newfilename)
218 hunks = cache_diff_hunks(old, new)
219
220 cumulative_offset = 0
221
222 # Find the hunk containing lineno (if any).
223 for (oldstart, oldlength), (newstart, newlength) in hunks:
224 cumulative_offset += newlength - oldlength
225
226 if lineno >= oldstart + oldlength:
227 # Not there yet.
228 continue
229
230 if lineno < oldstart:
231 # Gone too far.
232 break
233
234 # lineno is in [oldstart, oldlength] at revision; [newstart, newlength] at
235 # newrevision.
236
237 # If newlength == 0, newstart will be the line before the deleted hunk.
238 # Since the line must have been deleted, just return that as the nearest
239 # line in the new file. Caution: newstart can be 0 in this case.
240 if newlength == 0:
241 return max(1, newstart)
242
243 newend = newstart + newlength - 1
244
245 # Move lineno based on the amount the entire hunk shifted.
246 lineno = lineno + newstart - oldstart
247 # Constrain the output within the range [newstart, newend].
248 return min(newend, max(newstart, lineno))
249
250 # Wasn't in a hunk. Figure out the line motion based on the difference in
251 # length between the hunks seen so far.
252 return lineno + cumulative_offset
253
254
Edward Lemur0d462e92020-01-08 20:11:31 +0000255def hyper_blame(outbuf, ignored, filename, revision):
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000256 # Map from commit to parsed blame from that commit.
257 blame_from = {}
Josip Sokcevicd682fa42020-03-24 21:04:57 +0000258 filename = os.path.normpath(filename)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000259
260 def cache_blame_from(filename, commithash):
261 try:
262 return blame_from[commithash]
263 except KeyError:
264 parsed = get_parsed_blame(filename, commithash)
265 blame_from[commithash] = parsed
266 return parsed
267
268 try:
269 parsed = cache_blame_from(filename, git_common.hash_one(revision))
270 except subprocess2.CalledProcessError as e:
Edward Lemur0d462e92020-01-08 20:11:31 +0000271 sys.stderr.write(e.stderr.decode())
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000272 return e.returncode
273
274 new_parsed = []
275
276 # We don't show filenames in blame output unless we have to.
277 show_filenames = False
278
279 for line in parsed:
280 # If a line references an ignored commit, blame that commit's parent
281 # repeatedly until we find a non-ignored commit.
282 while line.commit.commithash in ignored:
283 if line.commit.previous is None:
284 # You can't ignore the commit that added this file.
285 break
286
287 previouscommit, previousfilename = line.commit.previous.split(' ', 1)
288 parent_blame = cache_blame_from(previousfilename, previouscommit)
289
290 if len(parent_blame) == 0:
291 # The previous version of this file was empty, therefore, you can't
292 # ignore this commit.
293 break
294
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000295 # line.lineno_then is the line number in question at line.commit. We need
296 # to translate that line number so that it refers to the position of the
297 # same line on previouscommit.
298 lineno_previous = approx_lineno_across_revs(
299 line.commit.filename, previousfilename, line.commit.commithash,
300 previouscommit, line.lineno_then)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000301 logging.debug('ignore commit %s on line p%d/t%d/n%d',
302 line.commit.commithash, lineno_previous, line.lineno_then,
303 line.lineno_now)
304
305 # Get the line at lineno_previous in the parent commit.
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000306 assert 1 <= lineno_previous <= len(parent_blame)
307 newline = parent_blame[lineno_previous - 1]
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000308
309 # Replace the commit and lineno_then, but not the lineno_now or context.
Matt Giuca2cd3c142017-04-10 17:31:44 +1000310 line = BlameLine(newline.commit, line.context, newline.lineno_then,
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000311 line.lineno_now, True)
Matt Giuca2cd3c142017-04-10 17:31:44 +1000312 logging.debug(' replacing with %r', line)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000313
314 # If any line has a different filename to the file's current name, turn on
315 # filename display for the entire blame output.
Josip Sokcevicd682fa42020-03-24 21:04:57 +0000316 # Use normpath to make variable consistent across platforms.
317 if os.path.normpath(line.commit.filename) != filename:
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000318 show_filenames = True
319
320 new_parsed.append(line)
321
Edward Lemur0d462e92020-01-08 20:11:31 +0000322 pretty_print(outbuf, new_parsed, show_filenames=show_filenames)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000323
324 return 0
325
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000326
327def parse_ignore_file(ignore_file):
328 for line in ignore_file:
329 line = line.split('#', 1)[0].strip()
330 if line:
331 yield line
332
333
Edward Lemur0d462e92020-01-08 20:11:31 +0000334def main(args, outbuf):
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000335 parser = argparse.ArgumentParser(
336 prog='git hyper-blame',
337 description='git blame with support for ignoring certain commits.')
338 parser.add_argument('-i', metavar='REVISION', action='append', dest='ignored',
339 default=[], help='a revision to ignore')
Edward Lemur0d462e92020-01-08 20:11:31 +0000340 parser.add_argument('--ignore-file', metavar='FILE', dest='ignore_file',
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000341 help='a file containing a list of revisions to ignore')
342 parser.add_argument('--no-default-ignores', dest='no_default_ignores',
Matt Giuca17a53072017-04-10 15:27:55 +1000343 action='store_true',
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000344 help='Do not ignore commits from .git-blame-ignore-revs.')
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000345 parser.add_argument('revision', nargs='?', default='HEAD', metavar='REVISION',
346 help='revision to look at')
347 parser.add_argument('filename', metavar='FILE', help='filename to blame')
348
349 args = parser.parse_args(args)
350 try:
351 repo_root = git_common.repo_root()
352 except subprocess2.CalledProcessError as e:
Edward Lemur0d462e92020-01-08 20:11:31 +0000353 sys.stderr.write(e.stderr.decode())
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000354 return e.returncode
355
356 # Make filename relative to the repository root, and cd to the root dir (so
357 # all filenames throughout this script are relative to the root).
358 filename = os.path.relpath(args.filename, repo_root)
359 os.chdir(repo_root)
360
361 # Normalize filename so we can compare it to other filenames git gives us.
362 filename = os.path.normpath(filename)
363 filename = os.path.normcase(filename)
364
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000365 ignored_list = list(args.ignored)
366 if not args.no_default_ignores and os.path.exists(DEFAULT_IGNORE_FILE_NAME):
367 with open(DEFAULT_IGNORE_FILE_NAME) as ignore_file:
368 ignored_list.extend(parse_ignore_file(ignore_file))
369
370 if args.ignore_file:
Edward Lemur0d462e92020-01-08 20:11:31 +0000371 with open(args.ignore_file) as ignore_file:
372 ignored_list.extend(parse_ignore_file(ignore_file))
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000373
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000374 ignored = set()
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000375 for c in ignored_list:
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000376 try:
377 ignored.add(git_common.hash_one(c))
378 except subprocess2.CalledProcessError as e:
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000379 # Custom warning string (the message from git-rev-parse is inappropriate).
Edward Lemur0d462e92020-01-08 20:11:31 +0000380 sys.stderr.write('warning: unknown revision \'%s\'.\n' % c)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000381
Edward Lemur0d462e92020-01-08 20:11:31 +0000382 return hyper_blame(outbuf, ignored, filename, args.revision)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000383
384
385if __name__ == '__main__': # pragma: no cover
mgiuca@chromium.org63906ba2016-04-29 01:43:32 +0000386 setup_color.init()
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000387 with git_common.less() as less_input:
Edward Lemur0d462e92020-01-08 20:11:31 +0000388 sys.exit(main(sys.argv[1:], less_input))