findmissing: Cleaned regex queries

Removed code duplication by migrating search_usha function to common
file.

Cleaned up unused REGEX statements.

Searching for the upstream commit sha in a commit message was previously
done line by line. It now takes the entire description and finds what it
needs.

BUG=None
TEST=None

Change-Id: I1c185bf9905278b36aacb9f8ba5af5f091a0198a
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/dev-util/+/2121362
Reviewed-by: Curtis Malainey <cujomalainey@chromium.org>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Commit-Queue: Hirthanan Subenderan <hirthanan@google.com>
Tested-by: Hirthanan Subenderan <hirthanan@google.com>
diff --git a/contrib/findmissing/common.py b/contrib/findmissing/common.py
index 2df97af..6bf8cd3 100755
--- a/contrib/findmissing/common.py
+++ b/contrib/findmissing/common.py
@@ -11,6 +11,7 @@
 import os
 import re
 from enum import Enum
+import subprocess
 import MySQLdb
 
 import initdb_upstream
@@ -39,12 +40,6 @@
 GIT_COOKIE_PATH = os.path.join(WORKDIR, '.git-credential-cache/cookie')
 
 
-# "commit" is sometimes seen multiple times, such as with commit 6093aabdd0ee
-CHERRYPICK = re.compile(r'cherry picked from (commit )+([0-9a-f]+)')
-STABLE = re.compile(r'^\s*(commit )+([a-f0-9]+) upstream')
-STABLE2 = re.compile(r'^\s*\[\s*Upstream (commit )+([0-9a-f]+)\s*\]')
-
-
 class Status(Enum):
     """Text representation of database enum to track status of gerrit CL."""
     OPEN = 1 # Gerrit ticket was created for clean fix patch
@@ -90,6 +85,26 @@
     """Chromeos branch name"""
     return 'chromeos-%s' % version
 
+def search_upstream_sha(kernel_sha):
+    """Search for upstream sha that kernel_sha is cherry-picked from.
+
+    If found, return upstream_sha, otherwise return None.
+    """
+    usha = None
+    desc = subprocess.check_output(['git', 'show', '-s', kernel_sha],
+                                        encoding='utf-8', errors='ignore')
+
+    # "commit" is sometimes seen multiple times, such as with commit 6093aabdd0ee
+    m = re.findall(r'cherry picked from (commit )+([0-9a-f]+)', desc, re.M)
+    if not m:
+        m = re.findall(r'^\s*(commit )+([a-f0-9]+) upstream', desc, re.M)
+        if not m:
+            m = re.findall(r'^\s*\[\s*Upstream (commit )+([0-9a-f]+)\s*\]', desc, re.M)
+    if m:
+        # The patch may have been picked multiple times; only record the first entry.
+        usha = m.group(2)[:12]
+        return usha
+    return usha
 
 def patch_link(changeID):
     """Link to patch on gerrit"""