devserver: make artifact names globs by default

What this means is that, by default, we'll be using a glob expression
(i.e. a path that may contain shell-style wildcards) instead of a regex
to identify artifacts. We are still allowing artifacts to be defined by
a regex, as this is useful for some purposes such as disallowing certain
file name patterns; this semantics, however, needs to be specifically
selected via a flag.

Some additional changes included:

* Eliminate enforcing a single file in gsutil_util.GetGSNamesWithWait();
  this is already done in the caller function and there's no need to
  complicate the otherwise generic logic in this module.

* Have DeltaPayloadsArtifact override various name filtering parameters
  internally; letting the user define these may cause inconsistencies
  (especially given the glob vs regex semantics), makes it less robust
  and is not needed in practice.

* Fixes passing of dictionary args (as opposed to list args) to artifact
  objects via artifact factory.

* Fixes a great many gpylint errors/warnings.

BUG=chromium:280220
TEST=Unit tests
TEST=gpylint

Change-Id: I9ea59b7f8962a71f7b387b99b0577932e753c2eb
Reviewed-on: https://chromium-review.googlesource.com/167434
Reviewed-by: Gilad Arnold <garnold@chromium.org>
Tested-by: Gilad Arnold <garnold@chromium.org>
Commit-Queue: Gilad Arnold <garnold@chromium.org>
diff --git a/gsutil_util.py b/gsutil_util.py
index 22d3fe5..3cc884a 100644
--- a/gsutil_util.py
+++ b/gsutil_util.py
@@ -5,7 +5,8 @@
 """Module containing gsutil helper methods."""
 
 import distutils.version
-import logging
+import fnmatch
+import os
 import random
 import re
 import subprocess
@@ -39,15 +40,18 @@
 
   Attempts are tried with exponential backoff.
 
+  Args:
+    cmd: a string containing the gsutil command to run.
+    err_msg: string prepended to the exception thrown in case of a failure.
   Returns:
     stdout of the called gsutil command.
   Raises:
-    subprocess.CalledProcessError if all attempt to run gsutil cmd fails.
+    GSUtilError: if all attempts to run gsutil have failed.
   """
   proc = None
   sleep_timeout = 1
   stderr = None
-  for _attempt in range(GSUTIL_ATTEMPTS):
+  for _ in range(GSUTIL_ATTEMPTS):
     proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
     stdout, stderr = proc.communicate()
@@ -72,6 +76,9 @@
 def DownloadFromGS(src, dst):
   """Downloads object from gs_url |src| to |dst|.
 
+  Args:
+    src: source file on GS that needs to be downloaded.
+    dst: file to copy the source file to.
   Raises:
     GSUtilError: if an error occurs during the download.
   """
@@ -80,79 +87,69 @@
   GSUtilRun(cmd, msg)
 
 
-def _GetGSNamesFromList(filename_list, pattern):
-  """Given a list of filenames, returns the filenames that match pattern."""
-  matches = []
-  re_pattern = re.compile(pattern)
-  for filename in filename_list:
-    if re_pattern.match(filename):
-      matches.append(filename)
-
-  return matches
+def _GlobHasWildcards(pattern):
+  """Returns True if a glob pattern contains any wildcards."""
+  return len(pattern) > len(pattern.translate(None, '*.[]'))
 
 
-def GetGSNamesWithWait(pattern, archive_url, err_str, single_item=True,
-                       timeout=600, delay=10):
+def GetGSNamesWithWait(pattern, archive_url, err_str, timeout=600, delay=10,
+                       is_regex_pattern=False):
   """Returns the google storage names specified by the given pattern.
 
   This method polls Google Storage until the target artifacts specified by the
   pattern is available or until the timeout occurs. Because we may not know the
   exact name of the target artifacts, the method accepts a filename pattern,
   to identify whether an artifact whose name matches the pattern exists (e.g.
-  use pattern '_full_' to search for the full payload
+  use pattern '*_full_*' to search for the full payload
   'chromeos_R17-1413.0.0-a1_x86-mario_full_dev.bin'). Returns the name only if
   found before the timeout.
 
   Args:
-    pattern: Regular expression pattern to identify the target artifact.
+    pattern: a path pattern (glob or regex) identifying the files we need.
     archive_url: URL of the Google Storage bucket.
     err_str: String to display in the error message on error.
-    single_item: Only a single item should be returned. If more than one item
-                 matches the pattern errors out unless pattern matches one
-                 exactly.
-    timeout/delay: optional and self-explanatory.
-
+    timeout: how long are we allowed to keep trying.
+    delay: how long to wait between attempts.
+    is_regex_pattern: Whether the pattern is a regex (default: glob).
   Returns:
     The list of artifacts matching the pattern in Google Storage bucket or None
-      if not found.
+    if not found.
 
-  Raises:
-    PatternNotSpecific: If caller sets single_item but multiple items match.
   """
+  # Define the different methods used for obtaining the list of files on the
+  # archive directory, in the order in which they are attempted.
+  get_methods = []
+  # The default method is to check the manifest file in the archive directory.
+  get_methods.append(('gsutil cat %s/%s' % (archive_url, UPLOADED_LIST),
+                      'Failed to get a list of uploaded files.'))
+  # For backward compatibility, we fall back to using "gsutil ls" when the
+  # manifest file is not present.
+  get_methods.append(('gsutil ls %s/*' % archive_url,
+                      'Failed to list archive directory contents.'))
+
   deadline = time.time() + timeout
   while True:
     uploaded_list = []
-    try:
-      cmd = 'gsutil cat %s/%s' % (archive_url, UPLOADED_LIST)
-      msg = 'Failed to get a list of uploaded files.'
-      uploaded_list = GSUtilRun(cmd, msg).splitlines()
-    except GSUtilError:
-      # For backward compatibility, falling back to use "gsutil ls"
-      # when the manifest file is not present.
-      cmd = 'gsutil ls %s/*' % archive_url
-      msg = 'Failed to list payloads.'
-      returned_list = GSUtilRun(cmd, msg).splitlines()
-      for item in returned_list:
-        try:
-          uploaded_list.append(item.rsplit('/', 1)[1])
-        except IndexError:
-          pass
+    for cmd, msg in get_methods:
+      try:
+        result = GSUtilRun(cmd, msg)
+      except GSUtilError:
+        continue  # It didn't work, try the next method.
 
-    # Check if all target artifacts are available.
-    found_names = _GetGSNamesFromList(uploaded_list, pattern)
-    if found_names:
-      if single_item and len(found_names) > 1:
-        found_names_exact = _GetGSNamesFromList(uploaded_list, '^%s$' % pattern)
-        if not found_names_exact:
-          raise PatternNotSpecific(
-            'Too many items %s returned by pattern %s in %s' % (
-                str(found_names), pattern, archive_url))
-        else:
-          logging.info('More than one item returned but one file matched'
-                       ' exactly so returning that: %s.', found_names_exact)
-          found_names = found_names_exact
+      # Make sure we're dealing with artifact base names only.
+      uploaded_list = [os.path.basename(p) for p in result.splitlines()]
+      break
 
-      return found_names
+    # Only keep files matching the target artifact name/pattern.
+    if is_regex_pattern:
+      filter_re = re.compile(pattern)
+      matching_names = [f for f in uploaded_list
+                        if filter_re.search(f) is not None]
+    else:
+      matching_names = fnmatch.filter(uploaded_list, pattern)
+
+    if matching_names:
+      return matching_names
 
     # Don't delay past deadline.
     to_delay = random.uniform(1.5 * delay, 2.5 * delay)
@@ -168,17 +165,21 @@
 
   This lists out the contents of the given GS bucket or regex to GS buckets,
   and tries to grab the newest version found in the directory names.
+
+  Args:
+    gsutil_dir: directory location on GS to check.
+    with_release: whether versions include a release milestone (e.g. R12).
+  Returns:
+    The most recent version number found.
+
   """
   cmd = 'gsutil ls %s' % gsutil_dir
   msg = 'Failed to find most recent builds at %s' % gsutil_dir
   dir_names = [p.split('/')[-2] for p in GSUtilRun(cmd, msg).splitlines()]
   try:
-    if with_release:
-      versions = filter(lambda x: re.match(devserver_constants.VERSION_RE, x),
-                        dir_names)
-    else:
-      versions = filter(lambda x: re.match(devserver_constants.VERSION, x),
-                        dir_names)
+    filter_re = re.compile(devserver_constants.VERSION_RE if with_release
+                           else devserver_constants.VERSION)
+    versions = filter(filter_re.match, dir_names)
     latest_version = max(versions, key=distutils.version.LooseVersion)
   except ValueError:
     raise GSUtilError(msg)