Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 1 | # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | """Module containing gsutil helper methods.""" |
| 6 | |
joychen | f8f07e2 | 2013-07-12 17:45:51 -0700 | [diff] [blame] | 7 | import distutils.version |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 8 | import fnmatch |
| 9 | import os |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 10 | import random |
| 11 | import re |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 12 | import subprocess |
Chris Sosa | 101fd86 | 2012-06-12 17:44:53 -0700 | [diff] [blame] | 13 | import time |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 14 | |
joychen | f8f07e2 | 2013-07-12 17:45:51 -0700 | [diff] [blame] | 15 | import devserver_constants |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 16 | import log_util |
| 17 | |
Gilad Arnold | abb352e | 2012-09-23 01:24:27 -0700 | [diff] [blame] | 18 | |
Chris Sosa | 7cd2320 | 2013-10-15 17:22:57 -0700 | [diff] [blame] | 19 | GSUTIL_ATTEMPTS = 1 |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 20 | UPLOADED_LIST = 'UPLOADED' |
| 21 | |
| 22 | |
| 23 | # Module-local log function. |
| 24 | def _Log(message, *args): |
| 25 | return log_util.LogWithTag('GSUTIL_UTIL', message, *args) |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 26 | |
| 27 | |
| 28 | class GSUtilError(Exception): |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 29 | """Exception raised when we run into an error running gsutil.""" |
| 30 | pass |
| 31 | |
| 32 | |
| 33 | class PatternNotSpecific(Exception): |
| 34 | """Raised when unexpectedly more than one item is returned for a pattern.""" |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 35 | pass |
| 36 | |
| 37 | |
| 38 | def GSUtilRun(cmd, err_msg): |
| 39 | """Runs a GSUTIL command up to GSUTIL_ATTEMPTS number of times. |
| 40 | |
Chris Sosa | 101fd86 | 2012-06-12 17:44:53 -0700 | [diff] [blame] | 41 | Attempts are tried with exponential backoff. |
| 42 | |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 43 | Args: |
| 44 | cmd: a string containing the gsutil command to run. |
| 45 | err_msg: string prepended to the exception thrown in case of a failure. |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 46 | Returns: |
| 47 | stdout of the called gsutil command. |
| 48 | Raises: |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 49 | GSUtilError: if all attempts to run gsutil have failed. |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 50 | """ |
| 51 | proc = None |
Chris Sosa | 101fd86 | 2012-06-12 17:44:53 -0700 | [diff] [blame] | 52 | sleep_timeout = 1 |
Chris Sosa | 407e8c5 | 2013-02-27 15:33:35 -0800 | [diff] [blame] | 53 | stderr = None |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 54 | for _ in range(GSUTIL_ATTEMPTS): |
Chris Sosa | 407e8c5 | 2013-02-27 15:33:35 -0800 | [diff] [blame] | 55 | proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, |
| 56 | stderr=subprocess.PIPE) |
| 57 | stdout, stderr = proc.communicate() |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 58 | if proc.returncode == 0: |
| 59 | return stdout |
Yu-Ju Hong | c8974c0 | 2014-11-18 11:40:30 -0800 | [diff] [blame] | 60 | |
| 61 | not_exist_messages = ('matched no objects', 'non-existent object', |
| 62 | 'no urls matched') |
| 63 | if (stderr and any(x in stderr.lower() for x in not_exist_messages) or |
| 64 | stdout and any(x in stdout.lower() for x in not_exist_messages)): |
| 65 | # If the object does not exist, exit now instead of wasting time |
| 66 | # on retrying. Note that `gsutil stat` prints error message to |
| 67 | # stdout instead (b/16020252), so we check both stdout and |
| 68 | # stderr. |
Chris Sosa | 407e8c5 | 2013-02-27 15:33:35 -0800 | [diff] [blame] | 69 | break |
Yu-Ju Hong | c8974c0 | 2014-11-18 11:40:30 -0800 | [diff] [blame] | 70 | |
| 71 | if proc.returncode == 127: |
Chris Sosa | 6de2930 | 2013-03-14 15:27:36 -0700 | [diff] [blame] | 72 | raise GSUtilError('gsutil tool not found in your path.') |
Chris Sosa | 101fd86 | 2012-06-12 17:44:53 -0700 | [diff] [blame] | 73 | |
| 74 | time.sleep(sleep_timeout) |
| 75 | sleep_timeout *= 2 |
| 76 | |
Chris Sosa | 407e8c5 | 2013-02-27 15:33:35 -0800 | [diff] [blame] | 77 | raise GSUtilError('%s GSUTIL cmd %s failed with return code %d:\n\n%s' % ( |
| 78 | err_msg, cmd, proc.returncode, stderr)) |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 79 | |
| 80 | |
| 81 | def DownloadFromGS(src, dst): |
| 82 | """Downloads object from gs_url |src| to |dst|. |
| 83 | |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 84 | Args: |
| 85 | src: source file on GS that needs to be downloaded. |
| 86 | dst: file to copy the source file to. |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 87 | Raises: |
| 88 | GSUtilError: if an error occurs during the download. |
| 89 | """ |
| 90 | cmd = 'gsutil cp %s %s' % (src, dst) |
| 91 | msg = 'Failed to download "%s".' % src |
| 92 | GSUtilRun(cmd, msg) |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 93 | |
| 94 | |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 95 | def _GlobHasWildcards(pattern): |
| 96 | """Returns True if a glob pattern contains any wildcards.""" |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 97 | return len(pattern) > len(pattern.translate(None, '*?[]')) |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 98 | |
| 99 | |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 100 | def GetGSNamesWithWait(pattern, archive_url, err_str, timeout=600, delay=10, |
| 101 | is_regex_pattern=False): |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 102 | """Returns the google storage names specified by the given pattern. |
| 103 | |
| 104 | This method polls Google Storage until the target artifacts specified by the |
| 105 | pattern is available or until the timeout occurs. Because we may not know the |
| 106 | exact name of the target artifacts, the method accepts a filename pattern, |
| 107 | to identify whether an artifact whose name matches the pattern exists (e.g. |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 108 | use pattern '*_full_*' to search for the full payload |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 109 | 'chromeos_R17-1413.0.0-a1_x86-mario_full_dev.bin'). Returns the name only if |
| 110 | found before the timeout. |
| 111 | |
| 112 | Args: |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 113 | pattern: a path pattern (glob or regex) identifying the files we need. |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 114 | archive_url: URL of the Google Storage bucket. |
| 115 | err_str: String to display in the error message on error. |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 116 | timeout: how long are we allowed to keep trying. |
| 117 | delay: how long to wait between attempts. |
Chris Sosa | 6317e9a | 2013-09-06 16:43:44 -0700 | [diff] [blame] | 118 | is_regex_pattern: Whether the pattern is a regex (otherwise a glob). |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 119 | Returns: |
| 120 | The list of artifacts matching the pattern in Google Storage bucket or None |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 121 | if not found. |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 122 | |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 123 | """ |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 124 | # Define the different methods used for obtaining the list of files on the |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 125 | # archive directory, in the order in which they are attempted. Each method is |
| 126 | # defined by a tuple consisting of (i) the gsutil command-line to be |
| 127 | # executed; (ii) the error message to use in case of a failure (returned in |
| 128 | # the corresponding exception); (iii) the desired return value to use in case |
| 129 | # of success, or None if the actual command output should be used. |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 130 | get_methods = [] |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 131 | # If the pattern is a glob and contains no wildcards, we'll first attempt to |
Chris Sosa | 6317e9a | 2013-09-06 16:43:44 -0700 | [diff] [blame] | 132 | # stat the file via du. |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 133 | if not (is_regex_pattern or _GlobHasWildcards(pattern)): |
Yu-Ju Hong | c853b6e | 2014-11-17 15:04:55 -0800 | [diff] [blame] | 134 | get_methods.append(('gsutil stat %s/%s' % (archive_url, pattern), |
| 135 | 'Failed to stat on the artifact file.', pattern)) |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 136 | |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 137 | # The default method is to check the manifest file in the archive directory. |
| 138 | get_methods.append(('gsutil cat %s/%s' % (archive_url, UPLOADED_LIST), |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 139 | 'Failed to get a list of uploaded files.', |
| 140 | None)) |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 141 | # For backward compatibility, we fall back to using "gsutil ls" when the |
| 142 | # manifest file is not present. |
| 143 | get_methods.append(('gsutil ls %s/*' % archive_url, |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 144 | 'Failed to list archive directory contents.', |
| 145 | None)) |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 146 | |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 147 | deadline = time.time() + timeout |
Chris Sosa | 6de2930 | 2013-03-14 15:27:36 -0700 | [diff] [blame] | 148 | while True: |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 149 | uploaded_list = [] |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 150 | for cmd, msg, override_result in get_methods: |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 151 | try: |
| 152 | result = GSUtilRun(cmd, msg) |
| 153 | except GSUtilError: |
| 154 | continue # It didn't work, try the next method. |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 155 | |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 156 | if override_result: |
| 157 | result = override_result |
| 158 | |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 159 | # Make sure we're dealing with artifact base names only. |
| 160 | uploaded_list = [os.path.basename(p) for p in result.splitlines()] |
| 161 | break |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 162 | |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 163 | # Only keep files matching the target artifact name/pattern. |
| 164 | if is_regex_pattern: |
| 165 | filter_re = re.compile(pattern) |
| 166 | matching_names = [f for f in uploaded_list |
| 167 | if filter_re.search(f) is not None] |
| 168 | else: |
| 169 | matching_names = fnmatch.filter(uploaded_list, pattern) |
| 170 | |
| 171 | if matching_names: |
| 172 | return matching_names |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 173 | |
Chris Sosa | 6de2930 | 2013-03-14 15:27:36 -0700 | [diff] [blame] | 174 | # Don't delay past deadline. |
| 175 | to_delay = random.uniform(1.5 * delay, 2.5 * delay) |
| 176 | if to_delay < (deadline - time.time()): |
| 177 | _Log('Retrying in %f seconds...%s', to_delay, err_str) |
| 178 | time.sleep(to_delay) |
| 179 | else: |
| 180 | return None |
joychen | f8f07e2 | 2013-07-12 17:45:51 -0700 | [diff] [blame] | 181 | |
| 182 | |
joychen | 562699a | 2013-08-13 15:22:14 -0700 | [diff] [blame] | 183 | def GetLatestVersionFromGSDir(gsutil_dir, with_release=True): |
joychen | f8f07e2 | 2013-07-12 17:45:51 -0700 | [diff] [blame] | 184 | """Returns most recent version number found in a GS directory. |
| 185 | |
| 186 | This lists out the contents of the given GS bucket or regex to GS buckets, |
| 187 | and tries to grab the newest version found in the directory names. |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 188 | |
| 189 | Args: |
| 190 | gsutil_dir: directory location on GS to check. |
| 191 | with_release: whether versions include a release milestone (e.g. R12). |
| 192 | Returns: |
| 193 | The most recent version number found. |
| 194 | |
joychen | f8f07e2 | 2013-07-12 17:45:51 -0700 | [diff] [blame] | 195 | """ |
| 196 | cmd = 'gsutil ls %s' % gsutil_dir |
| 197 | msg = 'Failed to find most recent builds at %s' % gsutil_dir |
| 198 | dir_names = [p.split('/')[-2] for p in GSUtilRun(cmd, msg).splitlines()] |
| 199 | try: |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 200 | filter_re = re.compile(devserver_constants.VERSION_RE if with_release |
| 201 | else devserver_constants.VERSION) |
| 202 | versions = filter(filter_re.match, dir_names) |
joychen | f8f07e2 | 2013-07-12 17:45:51 -0700 | [diff] [blame] | 203 | latest_version = max(versions, key=distutils.version.LooseVersion) |
| 204 | except ValueError: |
| 205 | raise GSUtilError(msg) |
| 206 | |
| 207 | return latest_version |