blob: e335c788cae51c643c8618931ecf7fcd245af37d [file] [log] [blame]
Josip Sokcevic4de5dea2022-03-23 21:15:14 +00001#!/usr/bin/env python3
mgiuca@chromium.org81937562016-02-03 08:00:53 +00002# Copyright 2016 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
mgiuca@chromium.org81937562016-02-03 08:00:53 +00005"""Wrapper around git blame that ignores certain commits.
6"""
7
mgiuca@chromium.org81937562016-02-03 08:00:53 +00008import argparse
9import collections
10import logging
11import os
12import subprocess2
13import sys
14
15import git_common
16import git_dates
mgiuca@chromium.org63906ba2016-04-29 01:43:32 +000017import setup_color
mgiuca@chromium.org81937562016-02-03 08:00:53 +000018
mgiuca@chromium.org81937562016-02-03 08:00:53 +000019logging.getLogger().setLevel(logging.INFO)
20
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +000021DEFAULT_IGNORE_FILE_NAME = '.git-blame-ignore-revs'
22
23
mgiuca@chromium.org81937562016-02-03 08:00:53 +000024class Commit(object):
Mike Frysinger124bb8e2023-09-06 05:48:55 +000025 """Info about a commit."""
26 def __init__(self, commithash):
27 self.commithash = commithash
28 self.author = None
29 self.author_mail = None
30 self.author_time = None
31 self.author_tz = None
32 self.committer = None
33 self.committer_mail = None
34 self.committer_time = None
35 self.committer_tz = None
36 self.summary = None
37 self.boundary = None
38 self.previous = None
39 self.filename = None
mgiuca@chromium.org81937562016-02-03 08:00:53 +000040
Mike Frysinger124bb8e2023-09-06 05:48:55 +000041 def __repr__(self): # pragma: no cover
42 return '<Commit %s>' % self.commithash
mgiuca@chromium.org81937562016-02-03 08:00:53 +000043
44
45BlameLine = collections.namedtuple(
Mike Frysinger124bb8e2023-09-06 05:48:55 +000046 'BlameLine', 'commit context lineno_then lineno_now modified')
mgiuca@chromium.org81937562016-02-03 08:00:53 +000047
48
49def parse_blame(blameoutput):
Mike Frysinger124bb8e2023-09-06 05:48:55 +000050 """Parses the output of git blame -p into a data structure."""
51 lines = blameoutput.split('\n')
52 i = 0
53 commits = {}
mgiuca@chromium.org81937562016-02-03 08:00:53 +000054
mgiuca@chromium.org81937562016-02-03 08:00:53 +000055 while i < len(lines):
Mike Frysinger124bb8e2023-09-06 05:48:55 +000056 # Read a commit line and parse it.
57 line = lines[i]
58 i += 1
59 if not line.strip():
60 continue
61 commitline = line.split()
62 commithash = commitline[0]
63 lineno_then = int(commitline[1])
64 lineno_now = int(commitline[2])
mgiuca@chromium.org81937562016-02-03 08:00:53 +000065
Mike Frysinger124bb8e2023-09-06 05:48:55 +000066 try:
67 commit = commits[commithash]
68 except KeyError:
69 commit = Commit(commithash)
70 commits[commithash] = commit
mgiuca@chromium.org81937562016-02-03 08:00:53 +000071
Mike Frysinger124bb8e2023-09-06 05:48:55 +000072 # Read commit details until we find a context line.
73 while i < len(lines):
74 line = lines[i]
75 i += 1
76 if line.startswith('\t'):
77 break
mgiuca@chromium.org81937562016-02-03 08:00:53 +000078
Mike Frysinger124bb8e2023-09-06 05:48:55 +000079 try:
80 key, value = line.split(' ', 1)
81 except ValueError:
82 key = line
83 value = True
84 setattr(commit, key.replace('-', '_'), value)
85
86 context = line[1:]
87
88 yield BlameLine(commit, context, lineno_then, lineno_now, False)
mgiuca@chromium.org81937562016-02-03 08:00:53 +000089
90
Edward Lemur0d462e92020-01-08 20:11:31 +000091def print_table(outbuf, table, align):
Mike Frysinger124bb8e2023-09-06 05:48:55 +000092 """Print a 2D rectangular array, aligning columns with spaces.
mgiuca@chromium.org81937562016-02-03 08:00:53 +000093
94 Args:
Edward Lemur0d462e92020-01-08 20:11:31 +000095 align: string of 'l' and 'r', designating whether each column is left- or
96 right-aligned.
mgiuca@chromium.org81937562016-02-03 08:00:53 +000097 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +000098 if len(table) == 0:
99 return
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000100
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000101 colwidths = None
102 for row in table:
103 if colwidths is None:
104 colwidths = [len(x) for x in row]
105 else:
106 colwidths = [max(colwidths[i], len(x)) for i, x in enumerate(row)]
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000107
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000108 for row in table:
109 cells = []
110 for i, cell in enumerate(row):
111 padding = ' ' * (colwidths[i] - len(cell))
112 if align[i] == 'r':
113 cell = padding + cell
114 elif i < len(row) - 1:
115 # Do not pad the final column if left-aligned.
116 cell += padding
117 cells.append(cell.encode('utf-8', 'replace'))
118 try:
119 outbuf.write(b' '.join(cells) + b'\n')
120 except IOError: # pragma: no cover
121 # Can happen on Windows if the pipe is closed early.
122 pass
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000123
124
Edward Lemur0d462e92020-01-08 20:11:31 +0000125def pretty_print(outbuf, parsedblame, show_filenames=False):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000126 """Pretty-prints the output of parse_blame."""
127 table = []
128 for line in parsedblame:
129 author_time = git_dates.timestamp_offset_to_datetime(
130 line.commit.author_time, line.commit.author_tz)
131 row = [
132 line.commit.commithash[:8], '(' + line.commit.author,
133 git_dates.datetime_string(author_time),
134 str(line.lineno_now) + ('*' if line.modified else '') + ')',
135 line.context
136 ]
137 if show_filenames:
138 row.insert(1, line.commit.filename)
139 table.append(row)
140 print_table(outbuf, table, align='llllrl' if show_filenames else 'lllrl')
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000141
142
143def get_parsed_blame(filename, revision='HEAD'):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000144 blame = git_common.blame(filename, revision=revision, porcelain=True)
145 return list(parse_blame(blame))
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000146
147
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000148# Map from (oldrev, newrev) to hunk list (caching the results of git diff, but
149# only the hunk line numbers, not the actual diff contents).
150# hunk list contains (old, new) pairs, where old and new are (start, length)
151# pairs. A hunk list can also be None (if the diff failed).
152diff_hunks_cache = {}
153
154
155def cache_diff_hunks(oldrev, newrev):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000156 def parse_start_length(s):
157 # Chop the '-' or '+'.
158 s = s[1:]
159 # Length is optional (defaults to 1).
160 try:
161 start, length = s.split(',')
162 except ValueError:
163 start = s
164 length = 1
165 return int(start), int(length)
166
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000167 try:
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000168 return diff_hunks_cache[(oldrev, newrev)]
169 except KeyError:
170 pass
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000171
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000172 # Use -U0 to get the smallest possible hunks.
173 diff = git_common.diff(oldrev, newrev, '-U0')
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000174
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000175 # Get all the hunks.
176 hunks = []
177 for line in diff.split('\n'):
178 if not line.startswith('@@'):
179 continue
180 ranges = line.split(' ', 3)[1:3]
181 ranges = tuple(parse_start_length(r) for r in ranges)
182 hunks.append(ranges)
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000183
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000184 diff_hunks_cache[(oldrev, newrev)] = hunks
185 return hunks
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000186
187
188def approx_lineno_across_revs(filename, newfilename, revision, newrevision,
189 lineno):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000190 """Computes the approximate movement of a line number between two revisions.
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000191
192 Consider line |lineno| in |filename| at |revision|. This function computes the
193 line number of that line in |newfilename| at |newrevision|. This is
194 necessarily approximate.
195
196 Args:
197 filename: The file (within the repo) at |revision|.
198 newfilename: The name of the same file at |newrevision|.
199 revision: A git revision.
200 newrevision: Another git revision. Note: Can be ahead or behind |revision|.
201 lineno: Line number within |filename| at |revision|.
202
203 Returns:
204 Line number within |newfilename| at |newrevision|.
205 """
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000206 # This doesn't work that well if there are a lot of line changes within the
207 # hunk (demonstrated by
208 # GitHyperBlameLineMotionTest.testIntraHunkLineMotion). A fuzzy heuristic
209 # that takes the text of the new line and tries to find a deleted line
210 # within the hunk that mostly matches the new line could help.
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000211
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000212 # Use the <revision>:<filename> syntax to diff between two blobs. This is
213 # the only way to diff a file that has been renamed.
214 old = '%s:%s' % (revision, filename)
215 new = '%s:%s' % (newrevision, newfilename)
216 hunks = cache_diff_hunks(old, new)
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000217
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000218 cumulative_offset = 0
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000219
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000220 # Find the hunk containing lineno (if any).
221 for (oldstart, oldlength), (newstart, newlength) in hunks:
222 cumulative_offset += newlength - oldlength
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000223
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000224 if lineno >= oldstart + oldlength:
225 # Not there yet.
226 continue
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000227
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000228 if lineno < oldstart:
229 # Gone too far.
230 break
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000231
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000232 # lineno is in [oldstart, oldlength] at revision; [newstart, newlength]
233 # at newrevision.
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000234
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000235 # If newlength == 0, newstart will be the line before the deleted hunk.
236 # Since the line must have been deleted, just return that as the nearest
237 # line in the new file. Caution: newstart can be 0 in this case.
238 if newlength == 0:
239 return max(1, newstart)
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000240
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000241 newend = newstart + newlength - 1
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000242
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000243 # Move lineno based on the amount the entire hunk shifted.
244 lineno = lineno + newstart - oldstart
245 # Constrain the output within the range [newstart, newend].
246 return min(newend, max(newstart, lineno))
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000247
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000248 # Wasn't in a hunk. Figure out the line motion based on the difference in
249 # length between the hunks seen so far.
250 return lineno + cumulative_offset
mgiuca@chromium.org01d2cde2016-02-05 03:25:41 +0000251
252
Edward Lemur0d462e92020-01-08 20:11:31 +0000253def hyper_blame(outbuf, ignored, filename, revision):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000254 # Map from commit to parsed blame from that commit.
255 blame_from = {}
256 filename = os.path.normpath(filename)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000257
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000258 def cache_blame_from(filename, commithash):
259 try:
260 return blame_from[commithash]
261 except KeyError:
262 parsed = get_parsed_blame(filename, commithash)
263 blame_from[commithash] = parsed
264 return parsed
265
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000266 try:
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000267 parsed = cache_blame_from(filename, git_common.hash_one(revision))
268 except subprocess2.CalledProcessError as e:
269 sys.stderr.write(e.stderr.decode())
270 return e.returncode
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000271
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000272 new_parsed = []
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000273
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000274 # We don't show filenames in blame output unless we have to.
275 show_filenames = False
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000276
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000277 for line in parsed:
278 # If a line references an ignored commit, blame that commit's parent
279 # repeatedly until we find a non-ignored commit.
280 while line.commit.commithash in ignored:
281 if line.commit.previous is None:
282 # You can't ignore the commit that added this file.
283 break
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000284
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000285 previouscommit, previousfilename = line.commit.previous.split(
286 ' ', 1)
287 parent_blame = cache_blame_from(previousfilename, previouscommit)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000288
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000289 if len(parent_blame) == 0:
290 # The previous version of this file was empty, therefore, you
291 # can't ignore this commit.
292 break
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000293
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000294 # line.lineno_then is the line number in question at line.commit. We
295 # need to translate that line number so that it refers to the
296 # position of the same line on previouscommit.
297 lineno_previous = approx_lineno_across_revs(line.commit.filename,
298 previousfilename,
299 line.commit.commithash,
300 previouscommit,
301 line.lineno_then)
302 logging.debug('ignore commit %s on line p%d/t%d/n%d',
303 line.commit.commithash, lineno_previous,
304 line.lineno_then, line.lineno_now)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000305
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000306 # Get the line at lineno_previous in the parent commit.
307 assert 1 <= lineno_previous <= len(parent_blame)
308 newline = parent_blame[lineno_previous - 1]
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000309
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000310 # Replace the commit and lineno_then, but not the lineno_now or
311 # context.
312 line = BlameLine(newline.commit, line.context, newline.lineno_then,
313 line.lineno_now, True)
314 logging.debug(' replacing with %r', line)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000315
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000316 # If any line has a different filename to the file's current name, turn
317 # on filename display for the entire blame output. Use normpath to make
318 # variable consistent across platforms.
319 if os.path.normpath(line.commit.filename) != filename:
320 show_filenames = True
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000321
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000322 new_parsed.append(line)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000323
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000324 pretty_print(outbuf, new_parsed, show_filenames=show_filenames)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000325
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000326 return 0
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000327
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000328
329def parse_ignore_file(ignore_file):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000330 for line in ignore_file:
331 line = line.split('#', 1)[0].strip()
332 if line:
333 yield line
mgiuca@chromium.orgcd0a1cf2016-02-22 00:40:33 +0000334
335
Edward Lemur0d462e92020-01-08 20:11:31 +0000336def main(args, outbuf):
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000337 parser = argparse.ArgumentParser(
338 prog='git hyper-blame',
339 description='git blame with support for ignoring certain commits.')
340 parser.add_argument('-i',
341 metavar='REVISION',
342 action='append',
343 dest='ignored',
344 default=[],
345 help='a revision to ignore')
346 parser.add_argument('--ignore-file',
347 metavar='FILE',
348 dest='ignore_file',
349 help='a file containing a list of revisions to ignore')
350 parser.add_argument(
351 '--no-default-ignores',
352 dest='no_default_ignores',
353 action='store_true',
354 help='Do not ignore commits from .git-blame-ignore-revs.')
355 parser.add_argument('revision',
356 nargs='?',
357 default='HEAD',
358 metavar='REVISION',
359 help='revision to look at')
360 parser.add_argument('filename', metavar='FILE', help='filename to blame')
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000361
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000362 args = parser.parse_args(args)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000363 try:
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000364 repo_root = git_common.repo_root()
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000365 except subprocess2.CalledProcessError as e:
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000366 sys.stderr.write(e.stderr.decode())
367 return e.returncode
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000368
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000369 # Make filename relative to the repository root, and cd to the root dir (so
370 # all filenames throughout this script are relative to the root).
371 filename = os.path.relpath(args.filename, repo_root)
372 os.chdir(repo_root)
373
374 # Normalize filename so we can compare it to other filenames git gives us.
375 filename = os.path.normpath(filename)
376 filename = os.path.normcase(filename)
377
378 ignored_list = list(args.ignored)
379 if not args.no_default_ignores and os.path.exists(DEFAULT_IGNORE_FILE_NAME):
380 with open(DEFAULT_IGNORE_FILE_NAME) as ignore_file:
381 ignored_list.extend(parse_ignore_file(ignore_file))
382
383 if args.ignore_file:
384 with open(args.ignore_file) as ignore_file:
385 ignored_list.extend(parse_ignore_file(ignore_file))
386
387 ignored = set()
388 for c in ignored_list:
389 try:
390 ignored.add(git_common.hash_one(c))
391 except subprocess2.CalledProcessError as e:
392 # Custom warning string (the message from git-rev-parse is
393 # inappropriate).
394 sys.stderr.write('warning: unknown revision \'%s\'.\n' % c)
395
396 return hyper_blame(outbuf, ignored, filename, args.revision)
mgiuca@chromium.org81937562016-02-03 08:00:53 +0000397
398
399if __name__ == '__main__': # pragma: no cover
Mike Frysinger124bb8e2023-09-06 05:48:55 +0000400 setup_color.init()
401 with git_common.less() as less_input:
402 sys.exit(main(sys.argv[1:], less_input))