blob: 663fd6c6ee084e9e90ee796f5cd17c42ba890fc5 [file] [log] [blame]
Avi Drissmanea62cfa2023-01-24 14:57:29 -05001#!/usr/bin/env python3
Dale Curtis417709d2018-11-19 15:46:52 -08002#
Avi Drissmanea62cfa2023-01-24 14:57:29 -05003# Copyright 2018 The Chromium Authors
Dale Curtis417709d2018-11-19 15:46:52 -08004# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6#
7"""Usage: find_patches.py [origin_branch] [> patch_file]
8
9This will find all changes in |origin_branch| that are not part of upstream,
10and print a report. It tries to include deleted lines, though these are
11heuristic at best. If |origin_branch| is omitted, it will default to HEAD.
12
13Changes in the working directory are ignored.
14
15Output will be written to stdout, so you probably want to redirect it.
16
17For example, to generate the patches file for origin/merge-m68:
18find_patches.py origin/merge-m68 > patches.68
19"""
20
Dale Curtis417709d2018-11-19 15:46:52 -080021import collections
22import os
23import re
24import sys
25import subprocess
26
27# What directory will we look for patches in?
28# TODO(liberato): Should we find the root of the ffmpeg tree?
29PATH = "."
30
31
32def log(str):
33 print("[%s]" % str, file=sys.stderr)
34
35
36def run(command):
37 """ Runs a command and returns stdout.
38
39 Args:
40 command: Array of argv[] entries. E.g., ["path_to_executable", "arg1", ...].
41
42 Returns:
43 stdout as a a string.
44 """
Avi Drissmanea62cfa2023-01-24 14:57:29 -050045 return subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0].decode()
Dale Curtis417709d2018-11-19 15:46:52 -080046
47
48class PatchInfo:
49 """ Structure to keep track of one patch in a diff.
50
51 This class encapsulates how to handle inserted / deleted lines in a patch,
52 mostly so that we can decide if we should apply "deleted lines only"
53 processing to any them, to find what commit deleted them. Because deleted
54 lines result in an approximate search, we want to be reasonably sure that
55 any deleted lines aren't actually just changes ("delete old, add new").
56 """
57
58 def __init__(self):
59 # Does a diff insert any lines?
60 self._does_insert = False
61 # Set of lines that a diff deletes.
62 self._deleted_lines = set()
63
64 def record_inserted_line(self, line):
65 """ Records that |line| was inserted as part of the patch.
66
67 |line| is a string from the patch, e.g., "+ foo that was added;"
68 """
69 self._does_insert = True
70
71 def record_deleted_line(self, line):
72 """ Records that |line| was deleted as part of the patch.
73
74 |line| is a string from the patch, e.g., "- foo that was removed;"
75 """
76 self._deleted_lines.add(line)
77
78 def interesting_deleted_lines(self):
79 """ Return the (possibly empty) set of deleted lines that we should track.
80
81 In general, things that remove but also add probably are changes, and
82 can be ignored as noise. While, with perfect deleted line tracking,
83 this wouldn't actually change the result, we really just do a text
84 search for deleted lines later. So, avoiding noise is good.
85
86 Note that this is approximate -- a diff could have deleted and
87 inserted lines near each other, but from different patches. In other
88 words, patch A could delete lines and patch B could add / change them.
89 If those changes end up in the same diff block, then we'll miss A
90 because of this test. However, in practice, checking for both seems
91 to remove some noise.
92 """
93 if self._deleted_lines and not self._does_insert:
94 return self._deleted_lines
95 return set()
96
97
98def main(argv):
99 # Origin branch that contains the patches we want to find.
100 # Can specify, for example "origin/merge-m68" to get the patches file for
101 # that revision, regardless of the state of the working tree.
102 if len(argv) > 1:
103 origin_branch = argv[1]
104 else:
105 origin_branch = "HEAD"
106
107 # Make sure that upstream is up-to-date, else many things will likely not
108 # be reachable from it. We don't do this if run as part of a script.
109 if subprocess.call(["git", "fetch", "upstream"]):
110 raise Exception("Could not fetch from upstream")
111
112 write_patches_file(origin_branch, sys.stdout)
113
114
115def write_patches_file(origin_branch, output_file):
116 """Write the patches file for |origin_branch| to |output_file|."""
117 # Get the latest upstream commit that's reachable from the origin branch.
118 # We'll use that to compare against.
119 upstream = run(["git", "merge-base", "upstream/master",
120 origin_branch]).strip()
121 if not upstream:
122 raise Exception("Could not find upstream commit")
123
124 # "Everything reachable from |origin_branch| but not |upstream|". In other
125 # words, all and only chromium changes. Note that there are non-chromium
126 # authors here, since it will include cherry-picks to origin.
127 revision_range = "%s..%s" % (upstream, origin_branch)
128
129 log("Origin is %s" % origin_branch)
130 log("Upstream is %s" % upstream)
131
132 # Find diffs between the versions, excluding all files that are only on
133 # origin. We explicitly exclude .gitignore, since it exists in both places.
134 # Ask for no context, since we ignore it anyway.
135 diff = run([
136 "git", "diff", "--diff-filter=a", "-U0", revision_range, PATH,
137 ":!.gitignore"
138 ])
139
140 # Set of chromium patch sha1s we've seen.
141 sha1s = set()
142 # Map of sha1 to set of files that it affects.
143 sha1ToFiles = collections.defaultdict(set)
144 # Mapping of filename to set of lines that were deleted.
145 files_to_deleted_lines = {}
146 patch_info = PatchInfo()
147 filename = None
148
149 # Process each diff. Include a dummy line to flush out the last diff.
150 log("Scanning diffs between origin and upstream")
151 for line in diff.splitlines() + ["+++ just to handle deleted lines properly"]:
152 if line.startswith("+++"):
153 # If the previous patch was delete-only, then we need to search for it
154 # differently, since we don't get blame entries for deleted lines.
155 # Add the set of deleted lines to this filename.
156 deleted_lines = patch_info.interesting_deleted_lines()
157 if deleted_lines:
158 files_to_deleted_lines[filename] = deleted_lines
159
160 # Update to the new filename.
161 filename = line[6:]
162 log("Checking diffs in %s" % filename)
163
164 # Start of a new diff. We don't know if it inserts / deletes lines.
165 patch_info = PatchInfo()
166 elif line.startswith("@@"):
167 # @@ -linespec +linespec @@
168 # linespec is either "line_number,number_of_lines" or "line_number".
169 # Extract the "+linespec", which is what was added by |origin|.
170 # If the number of lines is specified as 0, then it's a deletion only.
171 # If the number of lines is unspecified, then it's 1.
172 added_linespec = re.sub(r"^.*\+(.*) @@.*", r"\1", line)
173 # Figure out the lines to blame. This is just "starting_line,+number".
174 if "," in added_linespec:
175 # linespec is "line_number,number_of_lines"
176 added_parts = added_linespec.split(",")
177 # Skip if this is a deletion.
178 if added_parts[1] == "0":
179 continue
180 blame_range = "%s,+%s" % (added_parts[0], added_parts[1])
181 else:
182 # One-line change
183 blame_range = "%s,+1" % added_linespec
184
185 blame = run([
186 "git", "blame", "-l",
187 "-L %s" % blame_range, revision_range, "--", filename
188 ])
189
190 # Collect sha1 lines, and create a mapping of files that is changed by
191 # each sha1.
192 for blame_line in blame.splitlines():
193 sha1 = blame_line.split(" ", 1)[0]
194 if sha1:
195 sha1s.add(sha1)
196 sha1ToFiles[sha1].add(filename)
197 elif line.startswith("---"):
198 # Do nothing. Just avoid matching "---" when we check for "-"
199 pass
200 elif line.startswith("-"):
201 # This diff does delete lines.
202 patch_info.record_deleted_line(line[1:])
203 elif line.startswith("+"):
204 # This diff does insert lines.
205 patch_info.record_inserted_line(line[1:])
206
207 # For all files that have deleted lines, look for the sha1 that deleted them.
208 # This is heuristic only; we're looking for "commits that contain some text".
209 for filename, deleted_lines in files_to_deleted_lines.items():
210 for deleted_line in deleted_lines:
211 # Make sure that the deleted line is long enough to provide context.
212 if len(deleted_line) < 4:
213 continue
214
215 log("Checking for deleted lines in %s" % filename)
216 # Specify "--first-parent" so that we find commits on (presumably) origin.
217 sha1 = run([
218 "git", "log", "-1", revision_range, "--format=%H", "-S", deleted_line,
219 origin_branch, "--", filename
220 ]).strip()
221
222 # Add the sha1 to the sets
223 sha1s.add(sha1)
224 sha1ToFiles[sha1].add(filename)
225
226 # Look up dates from sha1 hashes. We want to output them in a canonical order
227 # so that we can diff easier. Date order seems more convenient that sha1.
228 log("Looking up sha1 dates to sort them")
229 sha1_to_date = {}
230 for sha1 in sha1s:
231 date = run(["git", "log", "-1", "--format=%at", "%s" % sha1]).strip()
232 sha1_to_date[sha1] = date
233
234 # Print the patches file.
235 log("Writing patch file")
236 print(
237 "---------------------------------------------------------------------",
238 file=output_file)
239 print(
240 "-- Chromium Patches. Autogenerated by " + os.path.basename(__file__) +
241 ", do not edit --",
242 file=output_file)
243 print(
244 "---------------------------------------------------------------------",
245 file=output_file)
246 print("\n", file=output_file)
247 wd = os.getcwd()
Avi Drissmanea62cfa2023-01-24 14:57:29 -0500248 for sha1, date in sorted(sha1_to_date.items(), key=lambda kv: kv[1]):
Dale Curtis417709d2018-11-19 15:46:52 -0800249 print(
250 "------------------------------------------------------------------",
251 file=output_file)
252 for line in run(["git", "log", "-1", "%s" % sha1]).splitlines():
253 print(line.rstrip(), file=output_file)
254 print("\nAffects:", file=output_file)
255 # TODO(liberato): maybe add the lines that were affected.
256 for file in sorted(sha1ToFiles[sha1]):
Reid Klecknere62fca42019-08-06 15:24:48 -0700257 relfile = os.path.relpath(file, wd).replace('\\', '/')
258 print(" " + relfile, file=output_file)
Dale Curtis417709d2018-11-19 15:46:52 -0800259 print(file=output_file)
260
261 log("Done")
262
263
264if __name__ == "__main__":
265 main(sys.argv)