Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 1 | # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | """Module containing gsutil helper methods.""" |
| 6 | |
joychen | f8f07e2 | 2013-07-12 17:45:51 -0700 | [diff] [blame] | 7 | import distutils.version |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 8 | import fnmatch |
| 9 | import os |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 10 | import random |
| 11 | import re |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 12 | import subprocess |
Chris Sosa | 101fd86 | 2012-06-12 17:44:53 -0700 | [diff] [blame] | 13 | import time |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 14 | |
joychen | f8f07e2 | 2013-07-12 17:45:51 -0700 | [diff] [blame] | 15 | import devserver_constants |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 16 | import log_util |
| 17 | |
Gilad Arnold | abb352e | 2012-09-23 01:24:27 -0700 | [diff] [blame] | 18 | |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 19 | GSUTIL_ATTEMPTS = 5 |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 20 | UPLOADED_LIST = 'UPLOADED' |
| 21 | |
| 22 | |
| 23 | # Module-local log function. |
| 24 | def _Log(message, *args): |
| 25 | return log_util.LogWithTag('GSUTIL_UTIL', message, *args) |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 26 | |
| 27 | |
| 28 | class GSUtilError(Exception): |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 29 | """Exception raised when we run into an error running gsutil.""" |
| 30 | pass |
| 31 | |
| 32 | |
| 33 | class PatternNotSpecific(Exception): |
| 34 | """Raised when unexpectedly more than one item is returned for a pattern.""" |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 35 | pass |
| 36 | |
| 37 | |
| 38 | def GSUtilRun(cmd, err_msg): |
| 39 | """Runs a GSUTIL command up to GSUTIL_ATTEMPTS number of times. |
| 40 | |
Chris Sosa | 101fd86 | 2012-06-12 17:44:53 -0700 | [diff] [blame] | 41 | Attempts are tried with exponential backoff. |
| 42 | |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 43 | Args: |
| 44 | cmd: a string containing the gsutil command to run. |
| 45 | err_msg: string prepended to the exception thrown in case of a failure. |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 46 | Returns: |
| 47 | stdout of the called gsutil command. |
| 48 | Raises: |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 49 | GSUtilError: if all attempts to run gsutil have failed. |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 50 | """ |
| 51 | proc = None |
Chris Sosa | 101fd86 | 2012-06-12 17:44:53 -0700 | [diff] [blame] | 52 | sleep_timeout = 1 |
Chris Sosa | 407e8c5 | 2013-02-27 15:33:35 -0800 | [diff] [blame] | 53 | stderr = None |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 54 | for _ in range(GSUTIL_ATTEMPTS): |
Chris Sosa | 407e8c5 | 2013-02-27 15:33:35 -0800 | [diff] [blame] | 55 | proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, |
| 56 | stderr=subprocess.PIPE) |
| 57 | stdout, stderr = proc.communicate() |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 58 | if proc.returncode == 0: |
| 59 | return stdout |
Chris Sosa | 6de2930 | 2013-03-14 15:27:36 -0700 | [diff] [blame] | 60 | elif stderr and ('matched no objects' in stderr or |
| 61 | 'non-existent object' in stderr): |
Chris Sosa | 407e8c5 | 2013-02-27 15:33:35 -0800 | [diff] [blame] | 62 | # TODO(sosa): Note this is a heuristic that makes us not re-attempt |
| 63 | # unnecessarily. However, if it fails, the worst that can happen is just |
| 64 | # waiting longer than necessary. |
| 65 | break |
Chris Sosa | 6de2930 | 2013-03-14 15:27:36 -0700 | [diff] [blame] | 66 | elif proc.returncode == 127: |
| 67 | raise GSUtilError('gsutil tool not found in your path.') |
Chris Sosa | 101fd86 | 2012-06-12 17:44:53 -0700 | [diff] [blame] | 68 | |
| 69 | time.sleep(sleep_timeout) |
| 70 | sleep_timeout *= 2 |
| 71 | |
Chris Sosa | 407e8c5 | 2013-02-27 15:33:35 -0800 | [diff] [blame] | 72 | raise GSUtilError('%s GSUTIL cmd %s failed with return code %d:\n\n%s' % ( |
| 73 | err_msg, cmd, proc.returncode, stderr)) |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 74 | |
| 75 | |
| 76 | def DownloadFromGS(src, dst): |
| 77 | """Downloads object from gs_url |src| to |dst|. |
| 78 | |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 79 | Args: |
| 80 | src: source file on GS that needs to be downloaded. |
| 81 | dst: file to copy the source file to. |
Chris Sosa | 47a7d4e | 2012-03-28 11:26:55 -0700 | [diff] [blame] | 82 | Raises: |
| 83 | GSUtilError: if an error occurs during the download. |
| 84 | """ |
| 85 | cmd = 'gsutil cp %s %s' % (src, dst) |
| 86 | msg = 'Failed to download "%s".' % src |
| 87 | GSUtilRun(cmd, msg) |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 88 | |
| 89 | |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 90 | def _GlobHasWildcards(pattern): |
| 91 | """Returns True if a glob pattern contains any wildcards.""" |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 92 | return len(pattern) > len(pattern.translate(None, '*?[]')) |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 93 | |
| 94 | |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 95 | def GetGSNamesWithWait(pattern, archive_url, err_str, timeout=600, delay=10, |
| 96 | is_regex_pattern=False): |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 97 | """Returns the google storage names specified by the given pattern. |
| 98 | |
| 99 | This method polls Google Storage until the target artifacts specified by the |
| 100 | pattern is available or until the timeout occurs. Because we may not know the |
| 101 | exact name of the target artifacts, the method accepts a filename pattern, |
| 102 | to identify whether an artifact whose name matches the pattern exists (e.g. |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 103 | use pattern '*_full_*' to search for the full payload |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 104 | 'chromeos_R17-1413.0.0-a1_x86-mario_full_dev.bin'). Returns the name only if |
| 105 | found before the timeout. |
| 106 | |
| 107 | Args: |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 108 | pattern: a path pattern (glob or regex) identifying the files we need. |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 109 | archive_url: URL of the Google Storage bucket. |
| 110 | err_str: String to display in the error message on error. |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 111 | timeout: how long are we allowed to keep trying. |
| 112 | delay: how long to wait between attempts. |
Chris Sosa | 6317e9a | 2013-09-06 16:43:44 -0700 | [diff] [blame] | 113 | is_regex_pattern: Whether the pattern is a regex (otherwise a glob). |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 114 | Returns: |
| 115 | The list of artifacts matching the pattern in Google Storage bucket or None |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 116 | if not found. |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 117 | |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 118 | """ |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 119 | # Define the different methods used for obtaining the list of files on the |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 120 | # archive directory, in the order in which they are attempted. Each method is |
| 121 | # defined by a tuple consisting of (i) the gsutil command-line to be |
| 122 | # executed; (ii) the error message to use in case of a failure (returned in |
| 123 | # the corresponding exception); (iii) the desired return value to use in case |
| 124 | # of success, or None if the actual command output should be used. |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 125 | get_methods = [] |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 126 | # If the pattern is a glob and contains no wildcards, we'll first attempt to |
Chris Sosa | 6317e9a | 2013-09-06 16:43:44 -0700 | [diff] [blame] | 127 | # stat the file via du. |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 128 | if not (is_regex_pattern or _GlobHasWildcards(pattern)): |
Chris Sosa | 6317e9a | 2013-09-06 16:43:44 -0700 | [diff] [blame] | 129 | get_methods.append(('gsutil du %s/%s' % (archive_url, pattern), |
| 130 | 'Failed to du on the artifact file.', |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 131 | pattern)) |
| 132 | |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 133 | # The default method is to check the manifest file in the archive directory. |
| 134 | get_methods.append(('gsutil cat %s/%s' % (archive_url, UPLOADED_LIST), |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 135 | 'Failed to get a list of uploaded files.', |
| 136 | None)) |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 137 | # For backward compatibility, we fall back to using "gsutil ls" when the |
| 138 | # manifest file is not present. |
| 139 | get_methods.append(('gsutil ls %s/*' % archive_url, |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 140 | 'Failed to list archive directory contents.', |
| 141 | None)) |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 142 | |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 143 | deadline = time.time() + timeout |
Chris Sosa | 6de2930 | 2013-03-14 15:27:36 -0700 | [diff] [blame] | 144 | while True: |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 145 | uploaded_list = [] |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 146 | for cmd, msg, override_result in get_methods: |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 147 | try: |
| 148 | result = GSUtilRun(cmd, msg) |
| 149 | except GSUtilError: |
| 150 | continue # It didn't work, try the next method. |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 151 | |
Gilad Arnold | c703c8c | 2013-08-29 10:41:27 -0700 | [diff] [blame] | 152 | if override_result: |
| 153 | result = override_result |
| 154 | |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 155 | # Make sure we're dealing with artifact base names only. |
| 156 | uploaded_list = [os.path.basename(p) for p in result.splitlines()] |
| 157 | break |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 158 | |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 159 | # Only keep files matching the target artifact name/pattern. |
| 160 | if is_regex_pattern: |
| 161 | filter_re = re.compile(pattern) |
| 162 | matching_names = [f for f in uploaded_list |
| 163 | if filter_re.search(f) is not None] |
| 164 | else: |
| 165 | matching_names = fnmatch.filter(uploaded_list, pattern) |
| 166 | |
| 167 | if matching_names: |
| 168 | return matching_names |
Chris Sosa | 76e44b9 | 2013-01-31 12:11:38 -0800 | [diff] [blame] | 169 | |
Chris Sosa | 6de2930 | 2013-03-14 15:27:36 -0700 | [diff] [blame] | 170 | # Don't delay past deadline. |
| 171 | to_delay = random.uniform(1.5 * delay, 2.5 * delay) |
| 172 | if to_delay < (deadline - time.time()): |
| 173 | _Log('Retrying in %f seconds...%s', to_delay, err_str) |
| 174 | time.sleep(to_delay) |
| 175 | else: |
| 176 | return None |
joychen | f8f07e2 | 2013-07-12 17:45:51 -0700 | [diff] [blame] | 177 | |
| 178 | |
joychen | 562699a | 2013-08-13 15:22:14 -0700 | [diff] [blame] | 179 | def GetLatestVersionFromGSDir(gsutil_dir, with_release=True): |
joychen | f8f07e2 | 2013-07-12 17:45:51 -0700 | [diff] [blame] | 180 | """Returns most recent version number found in a GS directory. |
| 181 | |
| 182 | This lists out the contents of the given GS bucket or regex to GS buckets, |
| 183 | and tries to grab the newest version found in the directory names. |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 184 | |
| 185 | Args: |
| 186 | gsutil_dir: directory location on GS to check. |
| 187 | with_release: whether versions include a release milestone (e.g. R12). |
| 188 | Returns: |
| 189 | The most recent version number found. |
| 190 | |
joychen | f8f07e2 | 2013-07-12 17:45:51 -0700 | [diff] [blame] | 191 | """ |
| 192 | cmd = 'gsutil ls %s' % gsutil_dir |
| 193 | msg = 'Failed to find most recent builds at %s' % gsutil_dir |
| 194 | dir_names = [p.split('/')[-2] for p in GSUtilRun(cmd, msg).splitlines()] |
| 195 | try: |
Gilad Arnold | 950569b | 2013-08-27 14:38:01 -0700 | [diff] [blame] | 196 | filter_re = re.compile(devserver_constants.VERSION_RE if with_release |
| 197 | else devserver_constants.VERSION) |
| 198 | versions = filter(filter_re.match, dir_names) |
joychen | f8f07e2 | 2013-07-12 17:45:51 -0700 | [diff] [blame] | 199 | latest_version = max(versions, key=distutils.version.LooseVersion) |
| 200 | except ValueError: |
| 201 | raise GSUtilError(msg) |
| 202 | |
| 203 | return latest_version |