Add README.patches, clone find_patches.py from ffmpeg repository.
Script allows for automatically generating the README.patches file
so we can track local modifications more easily.
BUG=766721
Change-Id: I4170a5b629ded08d6bafe1d0b131527588773059
diff --git a/find_patches.py b/find_patches.py
new file mode 100755
index 0000000..f579fcd
--- /dev/null
+++ b/find_patches.py
@@ -0,0 +1,265 @@
+#!/usr/bin/env python
+#
+# Copyright 2018 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+#
+"""Usage: find_patches.py [origin_branch] [> patch_file]
+
+This will find all changes in |origin_branch| that are not part of upstream,
+and print a report. It tries to include deleted lines, though these are
+heuristic at best. If |origin_branch| is omitted, it will default to HEAD.
+
+Changes in the working directory are ignored.
+
+Output will be written to stdout, so you probably want to redirect it.
+
+For example, to generate the patches file for origin/merge-m68:
+find_patches.py origin/merge-m68 > patches.68
+"""
+
+from __future__ import print_function
+import collections
+import os
+import re
+import sys
+import subprocess
+
+# What directory will we look for patches in?
+# TODO(liberato): Should we find the root of the ffmpeg tree?
+PATH = "."
+
+
+def log(str):
+ print("[%s]" % str, file=sys.stderr)
+
+
+def run(command):
+ """ Runs a command and returns stdout.
+
+ Args:
+ command: Array of argv[] entries. E.g., ["path_to_executable", "arg1", ...].
+
+ Returns:
+ stdout as a a string.
+ """
+ return subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0]
+
+
+class PatchInfo:
+ """ Structure to keep track of one patch in a diff.
+
+ This class encapsulates how to handle inserted / deleted lines in a patch,
+ mostly so that we can decide if we should apply "deleted lines only"
+ processing to any them, to find what commit deleted them. Because deleted
+ lines result in an approximate search, we want to be reasonably sure that
+ any deleted lines aren't actually just changes ("delete old, add new").
+ """
+
+ def __init__(self):
+ # Does a diff insert any lines?
+ self._does_insert = False
+ # Set of lines that a diff deletes.
+ self._deleted_lines = set()
+
+ def record_inserted_line(self, line):
+ """ Records that |line| was inserted as part of the patch.
+
+ |line| is a string from the patch, e.g., "+ foo that was added;"
+ """
+ self._does_insert = True
+
+ def record_deleted_line(self, line):
+ """ Records that |line| was deleted as part of the patch.
+
+ |line| is a string from the patch, e.g., "- foo that was removed;"
+ """
+ self._deleted_lines.add(line)
+
+ def interesting_deleted_lines(self):
+ """ Return the (possibly empty) set of deleted lines that we should track.
+
+ In general, things that remove but also add probably are changes, and
+ can be ignored as noise. While, with perfect deleted line tracking,
+ this wouldn't actually change the result, we really just do a text
+ search for deleted lines later. So, avoiding noise is good.
+
+ Note that this is approximate -- a diff could have deleted and
+ inserted lines near each other, but from different patches. In other
+ words, patch A could delete lines and patch B could add / change them.
+ If those changes end up in the same diff block, then we'll miss A
+ because of this test. However, in practice, checking for both seems
+ to remove some noise.
+ """
+ if self._deleted_lines and not self._does_insert:
+ return self._deleted_lines
+ return set()
+
+
+def main(argv):
+ # Origin branch that contains the patches we want to find.
+ # Can specify, for example "origin/merge-m68" to get the patches file for
+ # that revision, regardless of the state of the working tree.
+ if len(argv) > 1:
+ origin_branch = argv[1]
+ else:
+ origin_branch = "HEAD"
+
+ # Make sure that upstream is up-to-date, else many things will likely not
+ # be reachable from it. We don't do this if run as part of a script.
+ if subprocess.call(["git", "fetch", "upstream"]):
+ raise Exception("Could not fetch from upstream")
+
+ write_patches_file(origin_branch, sys.stdout)
+
+
+def write_patches_file(origin_branch, output_file):
+ """Write the patches file for |origin_branch| to |output_file|."""
+ # Get the latest upstream commit that's reachable from the origin branch.
+ # We'll use that to compare against.
+ upstream = run(["git", "merge-base", "upstream/master",
+ origin_branch]).strip()
+ if not upstream:
+ raise Exception("Could not find upstream commit")
+
+ # "Everything reachable from |origin_branch| but not |upstream|". In other
+ # words, all and only chromium changes. Note that there are non-chromium
+ # authors here, since it will include cherry-picks to origin.
+ revision_range = "%s..%s" % (upstream, origin_branch)
+
+ log("Origin is %s" % origin_branch)
+ log("Upstream is %s" % upstream)
+
+ # Find diffs between the versions, excluding all files that are only on
+ # origin. We explicitly exclude .gitignore, since it exists in both places.
+ # Ask for no context, since we ignore it anyway.
+ diff = run([
+ "git", "diff", "--diff-filter=a", "-U0", revision_range, PATH,
+ ":!.gitignore"
+ ])
+
+ # Set of chromium patch sha1s we've seen.
+ sha1s = set()
+ # Map of sha1 to set of files that it affects.
+ sha1ToFiles = collections.defaultdict(set)
+ # Mapping of filename to set of lines that were deleted.
+ files_to_deleted_lines = {}
+ patch_info = PatchInfo()
+ filename = None
+
+ # Process each diff. Include a dummy line to flush out the last diff.
+ log("Scanning diffs between origin and upstream")
+ for line in diff.splitlines() + ["+++ just to handle deleted lines properly"]:
+ if line.startswith("+++"):
+ # If the previous patch was delete-only, then we need to search for it
+ # differently, since we don't get blame entries for deleted lines.
+ # Add the set of deleted lines to this filename.
+ deleted_lines = patch_info.interesting_deleted_lines()
+ if deleted_lines:
+ files_to_deleted_lines[filename] = deleted_lines
+
+ # Update to the new filename.
+ filename = line[6:]
+ log("Checking diffs in %s" % filename)
+
+ # Start of a new diff. We don't know if it inserts / deletes lines.
+ patch_info = PatchInfo()
+ elif line.startswith("@@"):
+ # @@ -linespec +linespec @@
+ # linespec is either "line_number,number_of_lines" or "line_number".
+ # Extract the "+linespec", which is what was added by |origin|.
+ # If the number of lines is specified as 0, then it's a deletion only.
+ # If the number of lines is unspecified, then it's 1.
+ added_linespec = re.sub(r"^.*\+(.*) @@.*", r"\1", line)
+ # Figure out the lines to blame. This is just "starting_line,+number".
+ if "," in added_linespec:
+ # linespec is "line_number,number_of_lines"
+ added_parts = added_linespec.split(",")
+ # Skip if this is a deletion.
+ if added_parts[1] == "0":
+ continue
+ blame_range = "%s,+%s" % (added_parts[0], added_parts[1])
+ else:
+ # One-line change
+ blame_range = "%s,+1" % added_linespec
+
+ blame = run([
+ "git", "blame", "-l",
+ "-L %s" % blame_range, revision_range, "--", filename
+ ])
+
+ # Collect sha1 lines, and create a mapping of files that is changed by
+ # each sha1.
+ for blame_line in blame.splitlines():
+ sha1 = blame_line.split(" ", 1)[0]
+ if sha1:
+ sha1s.add(sha1)
+ sha1ToFiles[sha1].add(filename)
+ elif line.startswith("---"):
+ # Do nothing. Just avoid matching "---" when we check for "-"
+ pass
+ elif line.startswith("-"):
+ # This diff does delete lines.
+ patch_info.record_deleted_line(line[1:])
+ elif line.startswith("+"):
+ # This diff does insert lines.
+ patch_info.record_inserted_line(line[1:])
+
+ # For all files that have deleted lines, look for the sha1 that deleted them.
+ # This is heuristic only; we're looking for "commits that contain some text".
+ for filename, deleted_lines in files_to_deleted_lines.items():
+ for deleted_line in deleted_lines:
+ # Make sure that the deleted line is long enough to provide context.
+ if len(deleted_line) < 4:
+ continue
+
+ log("Checking for deleted lines in %s" % filename)
+ # Specify "--first-parent" so that we find commits on (presumably) origin.
+ sha1 = run([
+ "git", "log", "-1", revision_range, "--format=%H", "-S", deleted_line,
+ origin_branch, "--", filename
+ ]).strip()
+
+ # Add the sha1 to the sets
+ sha1s.add(sha1)
+ sha1ToFiles[sha1].add(filename)
+
+ # Look up dates from sha1 hashes. We want to output them in a canonical order
+ # so that we can diff easier. Date order seems more convenient that sha1.
+ log("Looking up sha1 dates to sort them")
+ sha1_to_date = {}
+ for sha1 in sha1s:
+ date = run(["git", "log", "-1", "--format=%at", "%s" % sha1]).strip()
+ sha1_to_date[sha1] = date
+
+ # Print the patches file.
+ log("Writing patch file")
+ print(
+ "---------------------------------------------------------------------",
+ file=output_file)
+ print(
+ "-- Chromium Patches. Autogenerated by " + os.path.basename(__file__) +
+ ", do not edit --",
+ file=output_file)
+ print(
+ "---------------------------------------------------------------------",
+ file=output_file)
+ print("\n", file=output_file)
+ wd = os.getcwd()
+ for sha1, date in sorted(sha1_to_date.iteritems(), key=lambda (k, v): v):
+ print(
+ "------------------------------------------------------------------",
+ file=output_file)
+ for line in run(["git", "log", "-1", "%s" % sha1]).splitlines():
+ print(line.rstrip(), file=output_file)
+ print("\nAffects:", file=output_file)
+ # TODO(liberato): maybe add the lines that were affected.
+ for file in sorted(sha1ToFiles[sha1]):
+ print(" " + os.path.relpath(file, wd), file=output_file)
+ print(file=output_file)
+
+ log("Done")
+
+
+if __name__ == "__main__":
+ main(sys.argv)