Chris Sosa | 5e4246b | 2012-05-22 18:05:22 -0700 | [diff] [blame] | 1 | # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
Sean O'Connor | 5346e4e | 2010-08-12 18:49:24 +0200 | [diff] [blame] | 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
Gregory Nisbet | cf8c2ed | 2020-07-14 18:35:49 -0700 | [diff] [blame] | 5 | from __future__ import print_function |
| 6 | |
Sean O'Connor | 5346e4e | 2010-08-12 18:49:24 +0200 | [diff] [blame] | 7 | import logging |
Amin Hassani | 5cda21d | 2020-08-10 15:24:44 -0700 | [diff] [blame] | 8 | import os |
Sean O'Connor | 5346e4e | 2010-08-12 18:49:24 +0200 | [diff] [blame] | 9 | import re |
Gregory Nisbet | cf8c2ed | 2020-07-14 18:35:49 -0700 | [diff] [blame] | 10 | import six |
Congbin Guo | 63ae030 | 2019-08-12 16:37:49 -0700 | [diff] [blame] | 11 | import sys |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 12 | import urlparse |
Sean O'Connor | 5346e4e | 2010-08-12 18:49:24 +0200 | [diff] [blame] | 13 | |
Chris Sosa | 6542508 | 2013-10-16 13:26:22 -0700 | [diff] [blame] | 14 | from autotest_lib.client.bin import utils |
Amin Hassani | 18e3988 | 2020-08-10 15:32:10 -0700 | [diff] [blame] | 15 | from autotest_lib.client.common_lib import error |
Prashanth B | 32baa9b | 2014-03-13 13:23:01 -0700 | [diff] [blame] | 16 | from autotest_lib.client.common_lib.cros import dev_server |
David Haddock | 77b75c3 | 2020-05-14 01:56:32 -0700 | [diff] [blame] | 17 | from autotest_lib.client.common_lib.cros import kernel_utils |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 18 | from autotest_lib.server import autotest |
Shelley Chen | 61d2898 | 2016-10-28 09:40:20 -0700 | [diff] [blame] | 19 | from autotest_lib.server import utils as server_utils |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 20 | from autotest_lib.server.cros.dynamic_suite import constants as ds_constants |
| 21 | from autotest_lib.server.cros.dynamic_suite import tools |
Dan Shi | f3a35f7 | 2016-01-25 11:18:14 -0800 | [diff] [blame] | 22 | |
Shelley Chen | 16b8df3 | 2016-10-27 16:24:21 -0700 | [diff] [blame] | 23 | try: |
| 24 | from chromite.lib import metrics |
Dan Shi | 5e2efb7 | 2017-02-07 11:40:23 -0800 | [diff] [blame] | 25 | except ImportError: |
| 26 | metrics = utils.metrics_mock |
Sean O'Connor | 5346e4e | 2010-08-12 18:49:24 +0200 | [diff] [blame] | 27 | |
Gwendal Grignou | 3e96cc2 | 2017-06-07 16:22:51 -0700 | [diff] [blame] | 28 | |
Richard Barnette | 621a8e4 | 2018-06-25 17:34:11 -0700 | [diff] [blame] | 29 | def _metric_name(base_name): |
| 30 | return 'chromeos/autotest/provision/' + base_name |
| 31 | |
| 32 | |
Richard Barnette | e86b1ce | 2018-06-07 10:37:23 -0700 | [diff] [blame] | 33 | _QUICK_PROVISION_SCRIPT = 'quick-provision' |
Richard Barnette | 3e8b228 | 2018-05-15 20:42:20 +0000 | [diff] [blame] | 34 | |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 35 | # PROVISION_FAILED - A flag file to indicate provision failures. The |
| 36 | # file is created at the start of any AU procedure (see |
Richard Barnette | 9d43e56 | 2018-06-05 17:20:10 +0000 | [diff] [blame] | 37 | # `ChromiumOSUpdater._prepare_host()`). The file's location in |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 38 | # stateful means that on successul update it will be removed. Thus, if |
| 39 | # this file exists, it indicates that we've tried and failed in a |
| 40 | # previous attempt to update. |
| 41 | PROVISION_FAILED = '/var/tmp/provision_failed' |
| 42 | |
| 43 | |
Richard Barnette | 3e8b228 | 2018-05-15 20:42:20 +0000 | [diff] [blame] | 44 | # A flag file used to enable special handling in lab DUTs. Some |
| 45 | # parts of the system in Chromium OS test images will behave in ways |
| 46 | # convenient to the test lab when this file is present. Generally, |
| 47 | # we create this immediately after any update completes. |
| 48 | _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine' |
| 49 | |
| 50 | |
Richard Barnette | 3ef29a8 | 2018-06-28 13:52:54 -0700 | [diff] [blame] | 51 | # _TARGET_VERSION - A file containing the new version to which we plan |
| 52 | # to update. This file is used by the CrOS shutdown code to detect and |
| 53 | # handle certain version downgrade cases. Specifically: Downgrading |
| 54 | # may trigger an unwanted powerwash in the target build when the |
| 55 | # following conditions are met: |
| 56 | # * Source build is a v4.4 kernel with R69-10756.0.0 or later. |
| 57 | # * Target build predates the R69-10756.0.0 cutoff. |
| 58 | # When this file is present and indicates a downgrade, the OS shutdown |
| 59 | # code on the DUT knows how to prevent the powerwash. |
| 60 | _TARGET_VERSION = '/run/update_target_version' |
| 61 | |
| 62 | |
Richard Barnette | 5adb6d4 | 2018-06-28 15:52:32 -0700 | [diff] [blame] | 63 | # _REBOOT_FAILURE_MESSAGE - This is the standard message text returned |
| 64 | # when the Host.reboot() method fails. The source of this text comes |
| 65 | # from `wait_for_restart()` in client/common_lib/hosts/base_classes.py. |
| 66 | |
| 67 | _REBOOT_FAILURE_MESSAGE = 'Host did not return from reboot' |
| 68 | |
| 69 | |
Congbin Guo | eb7aa2d | 2019-07-15 16:10:44 -0700 | [diff] [blame] | 70 | DEVSERVER_PORT = '8082' |
| 71 | GS_CACHE_PORT = '8888' |
| 72 | |
| 73 | |
Richard Barnette | 9d43e56 | 2018-06-05 17:20:10 +0000 | [diff] [blame] | 74 | class _AttributedUpdateError(error.TestFail): |
| 75 | """Update failure with an attributed cause.""" |
| 76 | |
| 77 | def __init__(self, attribution, msg): |
| 78 | super(_AttributedUpdateError, self).__init__( |
| 79 | '%s: %s' % (attribution, msg)) |
Richard Barnette | 5adb6d4 | 2018-06-28 15:52:32 -0700 | [diff] [blame] | 80 | self._message = msg |
| 81 | |
| 82 | def _classify(self): |
| 83 | for err_pattern, classification in self._CLASSIFIERS: |
| 84 | if re.match(err_pattern, self._message): |
| 85 | return classification |
| 86 | return None |
| 87 | |
| 88 | @property |
| 89 | def failure_summary(self): |
| 90 | """Summarize this error for metrics reporting.""" |
| 91 | classification = self._classify() |
| 92 | if classification: |
| 93 | return '%s: %s' % (self._SUMMARY, classification) |
| 94 | else: |
| 95 | return self._SUMMARY |
Richard Barnette | 9d43e56 | 2018-06-05 17:20:10 +0000 | [diff] [blame] | 96 | |
| 97 | |
| 98 | class HostUpdateError(_AttributedUpdateError): |
| 99 | """Failure updating a DUT attributable to the DUT. |
| 100 | |
| 101 | This class of exception should be raised when the most likely cause |
| 102 | of failure was a condition existing on the DUT prior to the update, |
| 103 | such as a hardware problem, or a bug in the software on the DUT. |
| 104 | """ |
| 105 | |
Richard Barnette | 5adb6d4 | 2018-06-28 15:52:32 -0700 | [diff] [blame] | 106 | DUT_DOWN = 'No answer to ssh' |
| 107 | |
| 108 | _SUMMARY = 'DUT failed prior to update' |
| 109 | _CLASSIFIERS = [ |
| 110 | (DUT_DOWN, DUT_DOWN), |
| 111 | (_REBOOT_FAILURE_MESSAGE, 'Reboot failed'), |
| 112 | ] |
| 113 | |
Richard Barnette | 9d43e56 | 2018-06-05 17:20:10 +0000 | [diff] [blame] | 114 | def __init__(self, hostname, msg): |
| 115 | super(HostUpdateError, self).__init__( |
| 116 | 'Error on %s prior to update' % hostname, msg) |
| 117 | |
| 118 | |
Richard Barnette | 9d43e56 | 2018-06-05 17:20:10 +0000 | [diff] [blame] | 119 | class ImageInstallError(_AttributedUpdateError): |
| 120 | """Failure updating a DUT when installing from the devserver. |
| 121 | |
| 122 | This class of exception should be raised when the target DUT fails |
| 123 | to download and install the target image from the devserver, and |
| 124 | either the devserver or the DUT might be at fault. |
| 125 | """ |
| 126 | |
Richard Barnette | 5adb6d4 | 2018-06-28 15:52:32 -0700 | [diff] [blame] | 127 | _SUMMARY = 'Image failed to download and install' |
| 128 | _CLASSIFIERS = [] |
| 129 | |
Richard Barnette | 9d43e56 | 2018-06-05 17:20:10 +0000 | [diff] [blame] | 130 | def __init__(self, hostname, devserver, msg): |
| 131 | super(ImageInstallError, self).__init__( |
| 132 | 'Download and install failed from %s onto %s' |
| 133 | % (devserver, hostname), msg) |
| 134 | |
| 135 | |
| 136 | class NewBuildUpdateError(_AttributedUpdateError): |
| 137 | """Failure updating a DUT attributable to the target build. |
| 138 | |
| 139 | This class of exception should be raised when updating to a new |
| 140 | build fails, and the most likely cause of the failure is a bug in |
| 141 | the newly installed target build. |
| 142 | """ |
| 143 | |
Richard Barnette | 5adb6d4 | 2018-06-28 15:52:32 -0700 | [diff] [blame] | 144 | CHROME_FAILURE = 'Chrome failed to reach login screen' |
Richard Barnette | 5adb6d4 | 2018-06-28 15:52:32 -0700 | [diff] [blame] | 145 | ROLLBACK_FAILURE = 'System rolled back to previous build' |
| 146 | |
| 147 | _SUMMARY = 'New build failed' |
| 148 | _CLASSIFIERS = [ |
| 149 | (CHROME_FAILURE, 'Chrome did not start'), |
Richard Barnette | 5adb6d4 | 2018-06-28 15:52:32 -0700 | [diff] [blame] | 150 | (ROLLBACK_FAILURE, ROLLBACK_FAILURE), |
| 151 | ] |
| 152 | |
Richard Barnette | 9d43e56 | 2018-06-05 17:20:10 +0000 | [diff] [blame] | 153 | def __init__(self, update_version, msg): |
| 154 | super(NewBuildUpdateError, self).__init__( |
| 155 | 'Failure in build %s' % update_version, msg) |
| 156 | |
Richard Barnette | 621a8e4 | 2018-06-25 17:34:11 -0700 | [diff] [blame] | 157 | @property |
| 158 | def failure_summary(self): |
| 159 | #pylint: disable=missing-docstring |
| 160 | return 'Build failed to work after installing' |
| 161 | |
Richard Barnette | 9d43e56 | 2018-06-05 17:20:10 +0000 | [diff] [blame] | 162 | |
Richard Barnette | 3e8b228 | 2018-05-15 20:42:20 +0000 | [diff] [blame] | 163 | def _url_to_version(update_url): |
Dan Shi | 0f466e8 | 2013-02-22 15:44:58 -0800 | [diff] [blame] | 164 | """Return the version based on update_url. |
| 165 | |
| 166 | @param update_url: url to the image to update to. |
| 167 | |
| 168 | """ |
Dale Curtis | ddfdb94 | 2011-07-14 13:59:24 -0700 | [diff] [blame] | 169 | # The Chrome OS version is generally the last element in the URL. The only |
| 170 | # exception is delta update URLs, which are rooted under the version; e.g., |
| 171 | # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to |
| 172 | # strip off the au section of the path before reading the version. |
Dan Shi | 5002cfc | 2013-04-29 10:45:05 -0700 | [diff] [blame] | 173 | return re.sub('/au/.*', '', |
| 174 | urlparse.urlparse(update_url).path).split('/')[-1].strip() |
Sean O'Connor | 5346e4e | 2010-08-12 18:49:24 +0200 | [diff] [blame] | 175 | |
| 176 | |
Scott Zawalski | eadbf70 | 2013-03-14 09:23:06 -0400 | [diff] [blame] | 177 | def url_to_image_name(update_url): |
| 178 | """Return the image name based on update_url. |
| 179 | |
| 180 | From a URL like: |
| 181 | http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0 |
| 182 | return lumpy-release/R27-3837.0.0 |
| 183 | |
| 184 | @param update_url: url to the image to update to. |
| 185 | @returns a string representing the image name in the update_url. |
| 186 | |
| 187 | """ |
Amin Hassani | 1d6d3a7 | 2020-07-09 09:50:26 -0700 | [diff] [blame] | 188 | return urlparse.urlparse(update_url).path[len('/update/'):] |
Scott Zawalski | eadbf70 | 2013-03-14 09:23:06 -0400 | [diff] [blame] | 189 | |
| 190 | |
Richard Barnette | 4c81b97 | 2018-07-18 12:35:16 -0700 | [diff] [blame] | 191 | def get_update_failure_reason(exception): |
| 192 | """Convert an exception into a failure reason for metrics. |
| 193 | |
| 194 | The passed in `exception` should be one raised by failure of |
| 195 | `ChromiumOSUpdater.run_update`. The returned string will describe |
| 196 | the failure. If the input exception value is not a truish value |
| 197 | the return value will be `None`. |
| 198 | |
| 199 | The number of possible return strings is restricted to a limited |
| 200 | enumeration of values so that the string may be safely used in |
| 201 | Monarch metrics without worrying about cardinality of the range of |
| 202 | string values. |
| 203 | |
| 204 | @param exception Exception to be converted to a failure reason. |
| 205 | |
| 206 | @return A string suitable for use in Monarch metrics, or `None`. |
| 207 | """ |
| 208 | if exception: |
| 209 | if isinstance(exception, _AttributedUpdateError): |
| 210 | return exception.failure_summary |
| 211 | else: |
| 212 | return 'Unknown Error: %s' % type(exception).__name__ |
| 213 | return None |
| 214 | |
| 215 | |
Richard Barnette | 621a8e4 | 2018-06-25 17:34:11 -0700 | [diff] [blame] | 216 | def _get_metric_fields(update_url): |
| 217 | """Return a dict of metric fields. |
| 218 | |
| 219 | This is used for sending autoupdate metrics for the given update URL. |
| 220 | |
| 221 | @param update_url Metrics fields will be calculated from this URL. |
| 222 | """ |
| 223 | build_name = url_to_image_name(update_url) |
| 224 | try: |
| 225 | board, build_type, milestone, _ = server_utils.ParseBuildName( |
| 226 | build_name) |
| 227 | except server_utils.ParseBuildNameException: |
| 228 | logging.warning('Unable to parse build name %s for metrics. ' |
| 229 | 'Continuing anyway.', build_name) |
| 230 | board, build_type, milestone = ('', '', '') |
| 231 | return { |
| 232 | 'dev_server': dev_server.get_resolved_hostname(update_url), |
| 233 | 'board': board, |
| 234 | 'build_type': build_type, |
| 235 | 'milestone': milestone, |
| 236 | } |
| 237 | |
| 238 | |
Richard Barnette | 3e8b228 | 2018-05-15 20:42:20 +0000 | [diff] [blame] | 239 | class ChromiumOSUpdater(object): |
| 240 | """Chromium OS specific DUT update functionality.""" |
Gilad Arnold | d6adeb8 | 2015-09-21 07:10:03 -0700 | [diff] [blame] | 241 | |
Richard Barnette | 60e759e | 2018-07-21 20:56:59 -0700 | [diff] [blame] | 242 | def __init__(self, update_url, host=None, interactive=True, |
Amin Hassani | 18e3988 | 2020-08-10 15:32:10 -0700 | [diff] [blame] | 243 | is_release_bucket=None, is_servohost=False): |
Gilad Arnold | d6adeb8 | 2015-09-21 07:10:03 -0700 | [diff] [blame] | 244 | """Initializes the object. |
| 245 | |
Gilad Arnold | d6adeb8 | 2015-09-21 07:10:03 -0700 | [diff] [blame] | 246 | @param update_url: The URL we want the update to use. |
| 247 | @param host: A client.common_lib.hosts.Host implementation. |
David Haddock | 76a4c88 | 2017-12-13 18:50:09 -0800 | [diff] [blame] | 248 | @param interactive: Bool whether we are doing an interactive update. |
Amin Hassani | 1d6d3a7 | 2020-07-09 09:50:26 -0700 | [diff] [blame] | 249 | @param is_release_bucket: If True, use release bucket |
| 250 | gs://chromeos-releases. |
Garry Wang | 01a1d48 | 2020-08-02 20:46:53 -0700 | [diff] [blame] | 251 | @param is_servohost: Bool whether the update target is a servohost. |
Gilad Arnold | d6adeb8 | 2015-09-21 07:10:03 -0700 | [diff] [blame] | 252 | """ |
Gilad Arnold | d6adeb8 | 2015-09-21 07:10:03 -0700 | [diff] [blame] | 253 | self.update_url = update_url |
| 254 | self.host = host |
David Haddock | 76a4c88 | 2017-12-13 18:50:09 -0800 | [diff] [blame] | 255 | self.interactive = interactive |
Richard Barnette | 3e8b228 | 2018-05-15 20:42:20 +0000 | [diff] [blame] | 256 | self.update_version = _url_to_version(update_url) |
Amin Hassani | 1d6d3a7 | 2020-07-09 09:50:26 -0700 | [diff] [blame] | 257 | self._is_release_bucket = is_release_bucket |
Garry Wang | 01a1d48 | 2020-08-02 20:46:53 -0700 | [diff] [blame] | 258 | self._is_servohost = is_servohost |
| 259 | |
Richard Barnette | 3e8b228 | 2018-05-15 20:42:20 +0000 | [diff] [blame] | 260 | |
| 261 | def _run(self, cmd, *args, **kwargs): |
| 262 | """Abbreviated form of self.host.run(...)""" |
| 263 | return self.host.run(cmd, *args, **kwargs) |
Gilad Arnold | d6adeb8 | 2015-09-21 07:10:03 -0700 | [diff] [blame] | 264 | |
| 265 | |
Richard Barnette | 55d1af8 | 2018-05-22 23:40:14 +0000 | [diff] [blame] | 266 | def _rootdev(self, options=''): |
| 267 | """Returns the stripped output of rootdev <options>. |
| 268 | |
| 269 | @param options: options to run rootdev. |
| 270 | |
| 271 | """ |
| 272 | return self._run('rootdev %s' % options).stdout.strip() |
| 273 | |
| 274 | |
Richard Barnette | 55d1af8 | 2018-05-22 23:40:14 +0000 | [diff] [blame] | 275 | def _reset_update_engine(self): |
| 276 | """Resets the host to prepare for a clean update regardless of state.""" |
| 277 | self._run('stop ui || true') |
Amin Hassani | 18e3988 | 2020-08-10 15:32:10 -0700 | [diff] [blame] | 278 | self._run('restart update-engine') |
Luigi Semenzato | f15c8fc | 2017-03-03 14:12:40 -0800 | [diff] [blame] | 279 | |
Richard Barnette | 55d1af8 | 2018-05-22 23:40:14 +0000 | [diff] [blame] | 280 | |
| 281 | def _reset_stateful_partition(self): |
| 282 | """Clear any pending stateful update request.""" |
Amin Hassani | 5cda21d | 2020-08-10 15:24:44 -0700 | [diff] [blame] | 283 | cmd = ['rm', '-rf'] |
| 284 | for f in ('var_new', 'dev_image_new', '.update_available'): |
| 285 | cmd += [os.path.join('/mnt/stateful_partition', f)] |
Amin Hassani | 7f68fea | 2020-08-17 13:52:10 -0700 | [diff] [blame] | 286 | # TODO(b/165024723): This is a temporary measure until we figure out the |
| 287 | # root cause of this bug. |
| 288 | cmd += ['/mnt/stateful_partition/dev_image/share/tast/data/chromiumos/' |
| 289 | 'tast/local/bundles/'] |
Amin Hassani | 5cda21d | 2020-08-10 15:24:44 -0700 | [diff] [blame] | 290 | cmd += [_TARGET_VERSION, '2>&1'] |
| 291 | self._run(cmd) |
Richard Barnette | 3ef29a8 | 2018-06-28 13:52:54 -0700 | [diff] [blame] | 292 | |
| 293 | |
| 294 | def _set_target_version(self): |
| 295 | """Set the "target version" for the update.""" |
Amin Hassani | 1d6d3a7 | 2020-07-09 09:50:26 -0700 | [diff] [blame] | 296 | # Version strings that come from release buckets do not have RXX- at the |
| 297 | # beginning. So remove this prefix only if the version has it. |
| 298 | version_number = (self.update_version.split('-')[1] |
| 299 | if '-' in self.update_version |
| 300 | else self.update_version) |
Richard Barnette | 3ef29a8 | 2018-06-28 13:52:54 -0700 | [diff] [blame] | 301 | self._run('echo %s > %s' % (version_number, _TARGET_VERSION)) |
Richard Barnette | 55d1af8 | 2018-05-22 23:40:14 +0000 | [diff] [blame] | 302 | |
| 303 | |
| 304 | def _revert_boot_partition(self): |
| 305 | """Revert the boot partition.""" |
| 306 | part = self._rootdev('-s') |
| 307 | logging.warning('Reverting update; Boot partition will be %s', part) |
| 308 | return self._run('/postinst %s 2>&1' % part) |
Gilad Arnold | d6adeb8 | 2015-09-21 07:10:03 -0700 | [diff] [blame] | 309 | |
| 310 | |
Richard Barnette | f00a2ee | 2018-06-08 11:51:38 -0700 | [diff] [blame] | 311 | def _get_remote_script(self, script_name): |
| 312 | """Ensure that `script_name` is present on the DUT. |
Chris Sosa | 5e4246b | 2012-05-22 18:05:22 -0700 | [diff] [blame] | 313 | |
Amin Hassani | 18e3988 | 2020-08-10 15:32:10 -0700 | [diff] [blame] | 314 | The given script (e.g. `quick-provision`) may be present in the |
Richard Barnette | f00a2ee | 2018-06-08 11:51:38 -0700 | [diff] [blame] | 315 | stateful partition under /usr/local/bin, or we may have to |
| 316 | download it from the devserver. |
Chris Sosa | a3ac215 | 2012-05-23 22:23:13 -0700 | [diff] [blame] | 317 | |
Richard Barnette | f00a2ee | 2018-06-08 11:51:38 -0700 | [diff] [blame] | 318 | Determine whether the script is present or must be downloaded |
| 319 | and download if necessary. Then, return a command fragment |
| 320 | sufficient to run the script from whereever it now lives on the |
| 321 | DUT. |
Richard Barnette | 9d43e56 | 2018-06-05 17:20:10 +0000 | [diff] [blame] | 322 | |
Richard Barnette | f00a2ee | 2018-06-08 11:51:38 -0700 | [diff] [blame] | 323 | @param script_name The name of the script as expected in |
| 324 | /usr/local/bin and on the devserver. |
| 325 | @return A string with the command (minus arguments) that will |
| 326 | run the target script. |
Gwendal Grignou | 3e96cc2 | 2017-06-07 16:22:51 -0700 | [diff] [blame] | 327 | """ |
Richard Barnette | f00a2ee | 2018-06-08 11:51:38 -0700 | [diff] [blame] | 328 | remote_script = '/usr/local/bin/%s' % script_name |
| 329 | if self.host.path_exists(remote_script): |
| 330 | return remote_script |
Laurence Goodby | 06fb42c | 2020-02-29 17:14:42 -0800 | [diff] [blame] | 331 | self.host.run('mkdir -p -m 1777 /usr/local/tmp') |
| 332 | remote_tmp_script = '/usr/local/tmp/%s' % script_name |
Richard Barnette | f00a2ee | 2018-06-08 11:51:38 -0700 | [diff] [blame] | 333 | server_name = urlparse.urlparse(self.update_url)[1] |
| 334 | script_url = 'http://%s/static/%s' % (server_name, script_name) |
Dana Goyette | 353d1d9 | 2019-06-27 10:43:59 -0700 | [diff] [blame] | 335 | fetch_script = 'curl -Ss -o %s %s && head -1 %s' % ( |
| 336 | remote_tmp_script, script_url, remote_tmp_script) |
Chris Sosa | 5e4246b | 2012-05-22 18:05:22 -0700 | [diff] [blame] | 337 | |
Dana Goyette | 353d1d9 | 2019-06-27 10:43:59 -0700 | [diff] [blame] | 338 | first_line = self._run(fetch_script).stdout.strip() |
| 339 | |
| 340 | if first_line and first_line.startswith('#!'): |
| 341 | script_interpreter = first_line.lstrip('#!') |
| 342 | if script_interpreter: |
| 343 | return '%s %s' % (script_interpreter, remote_tmp_script) |
| 344 | return None |
Richard Barnette | f00a2ee | 2018-06-08 11:51:38 -0700 | [diff] [blame] | 345 | |
Richard Barnette | 14ee84c | 2018-05-18 20:23:42 +0000 | [diff] [blame] | 346 | def _prepare_host(self): |
| 347 | """Make sure the target DUT is working and ready for update. |
| 348 | |
| 349 | Initially, the target DUT's state is unknown. The DUT is |
| 350 | expected to be online, but we strive to be forgiving if Chrome |
| 351 | and/or the update engine aren't fully functional. |
| 352 | """ |
| 353 | # Summary of work, and the rationale: |
| 354 | # 1. Reboot, because it's a good way to clear out problems. |
| 355 | # 2. Touch the PROVISION_FAILED file, to allow repair to detect |
| 356 | # failure later. |
| 357 | # 3. Run the hook for host class specific preparation. |
| 358 | # 4. Stop Chrome, because the system is designed to eventually |
| 359 | # reboot if Chrome is stuck in a crash loop. |
| 360 | # 5. Force `update-engine` to start, because if Chrome failed |
| 361 | # to start properly, the status of the `update-engine` job |
| 362 | # will be uncertain. |
Richard Barnette | 5adb6d4 | 2018-06-28 15:52:32 -0700 | [diff] [blame] | 363 | if not self.host.is_up(): |
| 364 | raise HostUpdateError(self.host.hostname, |
| 365 | HostUpdateError.DUT_DOWN) |
Richard Barnette | 14ee84c | 2018-05-18 20:23:42 +0000 | [diff] [blame] | 366 | self._reset_stateful_partition() |
Garry Wang | 01a1d48 | 2020-08-02 20:46:53 -0700 | [diff] [blame] | 367 | # Servohost reboot logic is handled by themselves. |
| 368 | if not self._is_servohost: |
| 369 | self.host.reboot(timeout=self.host.REBOOT_TIMEOUT) |
| 370 | self._run('touch %s' % PROVISION_FAILED) |
Richard Barnette | 14ee84c | 2018-05-18 20:23:42 +0000 | [diff] [blame] | 371 | self.host.prepare_for_update() |
Garry Wang | 01a1d48 | 2020-08-02 20:46:53 -0700 | [diff] [blame] | 372 | # Servohost will only update via quick provision. |
| 373 | if not self._is_servohost: |
| 374 | self._reset_update_engine() |
Richard Barnette | 14ee84c | 2018-05-18 20:23:42 +0000 | [diff] [blame] | 375 | logging.info('Updating from version %s to %s.', |
| 376 | self.host.get_release_version(), |
| 377 | self.update_version) |
| 378 | |
Congbin Guo | eb7aa2d | 2019-07-15 16:10:44 -0700 | [diff] [blame] | 379 | def _quick_provision_with_gs_cache(self, provision_command, devserver_name, |
| 380 | image_name): |
| 381 | """Run quick_provision using GsCache server. |
| 382 | |
| 383 | @param provision_command: The path of quick_provision command. |
| 384 | @param devserver_name: The devserver name and port (optional). |
| 385 | @param image_name: The image to be installed. |
| 386 | """ |
| 387 | logging.info('Try quick provision with gs_cache.') |
| 388 | # If enabled, GsCache server listion on different port on the |
| 389 | # devserver. |
| 390 | gs_cache_server = devserver_name.replace(DEVSERVER_PORT, GS_CACHE_PORT) |
Amin Hassani | 1d6d3a7 | 2020-07-09 09:50:26 -0700 | [diff] [blame] | 391 | gs_cache_url = ('http://%s/download/%s' |
| 392 | % (gs_cache_server, |
| 393 | 'chromeos-releases' if self._is_release_bucket |
| 394 | else 'chromeos-image-archive')) |
Congbin Guo | eb7aa2d | 2019-07-15 16:10:44 -0700 | [diff] [blame] | 395 | |
| 396 | # Check if GS_Cache server is enabled on the server. |
Congbin Guo | 4a2a664 | 2019-08-12 15:03:01 -0700 | [diff] [blame] | 397 | self._run('curl -s -o /dev/null %s' % gs_cache_url) |
Congbin Guo | eb7aa2d | 2019-07-15 16:10:44 -0700 | [diff] [blame] | 398 | |
| 399 | command = '%s --noreboot %s %s' % (provision_command, image_name, |
| 400 | gs_cache_url) |
| 401 | self._run(command) |
| 402 | metrics.Counter(_metric_name('quick_provision')).increment( |
| 403 | fields={'devserver': devserver_name, 'gs_cache': True}) |
| 404 | |
| 405 | |
| 406 | def _quick_provision_with_devserver(self, provision_command, |
| 407 | devserver_name, image_name): |
| 408 | """Run quick_provision using legacy devserver. |
| 409 | |
| 410 | @param provision_command: The path of quick_provision command. |
| 411 | @param devserver_name: The devserver name and port (optional). |
| 412 | @param image_name: The image to be installed. |
| 413 | """ |
Congbin Guo | 63ae030 | 2019-08-12 16:37:49 -0700 | [diff] [blame] | 414 | logging.info('Try quick provision with devserver.') |
| 415 | ds = dev_server.ImageServer('http://%s' % devserver_name) |
Amin Hassani | 1d6d3a7 | 2020-07-09 09:50:26 -0700 | [diff] [blame] | 416 | archive_url = ('gs://chromeos-releases/%s' % image_name |
| 417 | if self._is_release_bucket else None) |
Congbin Guo | 63ae030 | 2019-08-12 16:37:49 -0700 | [diff] [blame] | 418 | try: |
Amin Hassani | 95f86e0 | 2020-07-14 13:06:03 -0700 | [diff] [blame] | 419 | ds.stage_artifacts(image_name, ['quick_provision', 'stateful', |
Amin Hassani | 1d6d3a7 | 2020-07-09 09:50:26 -0700 | [diff] [blame] | 420 | 'autotest_packages'], |
| 421 | archive_url=archive_url) |
Congbin Guo | 63ae030 | 2019-08-12 16:37:49 -0700 | [diff] [blame] | 422 | except dev_server.DevServerException as e: |
Gregory Nisbet | cf8c2ed | 2020-07-14 18:35:49 -0700 | [diff] [blame] | 423 | six.reraise(error.TestFail, str(e), sys.exc_info()[2]) |
Congbin Guo | 63ae030 | 2019-08-12 16:37:49 -0700 | [diff] [blame] | 424 | |
Congbin Guo | eb7aa2d | 2019-07-15 16:10:44 -0700 | [diff] [blame] | 425 | static_url = 'http://%s/static' % devserver_name |
| 426 | command = '%s --noreboot %s %s' % (provision_command, image_name, |
| 427 | static_url) |
Congbin Guo | eb7aa2d | 2019-07-15 16:10:44 -0700 | [diff] [blame] | 428 | self._run(command) |
| 429 | metrics.Counter(_metric_name('quick_provision')).increment( |
| 430 | fields={'devserver': devserver_name, 'gs_cache': False}) |
| 431 | |
| 432 | |
Amin Hassani | 18e3988 | 2020-08-10 15:32:10 -0700 | [diff] [blame] | 433 | def _install_update(self): |
Richard Barnette | e86b1ce | 2018-06-07 10:37:23 -0700 | [diff] [blame] | 434 | """Install an updating using the `quick-provision` script. |
| 435 | |
| 436 | This uses the `quick-provision` script to download and install |
| 437 | a root FS, kernel and stateful filesystem content. |
| 438 | |
| 439 | @return The kernel expected to be booted next. |
| 440 | """ |
Amin Hassani | 18e3988 | 2020-08-10 15:32:10 -0700 | [diff] [blame] | 441 | logging.info('Installing image at %s onto %s', |
| 442 | self.update_url, self.host.hostname) |
| 443 | server_name = urlparse.urlparse(self.update_url)[1] |
| 444 | image_name = url_to_image_name(self.update_url) |
| 445 | |
Amin Hassani | b04420b | 2020-07-08 18:46:11 +0000 | [diff] [blame] | 446 | logging.info('Installing image using quick-provision.') |
Richard Barnette | e86b1ce | 2018-06-07 10:37:23 -0700 | [diff] [blame] | 447 | provision_command = self._get_remote_script(_QUICK_PROVISION_SCRIPT) |
Richard Barnette | e86b1ce | 2018-06-07 10:37:23 -0700 | [diff] [blame] | 448 | try: |
Congbin Guo | eb7aa2d | 2019-07-15 16:10:44 -0700 | [diff] [blame] | 449 | try: |
| 450 | self._quick_provision_with_gs_cache(provision_command, |
| 451 | server_name, image_name) |
Amin Hassani | 95f86e0 | 2020-07-14 13:06:03 -0700 | [diff] [blame] | 452 | except Exception as e: |
| 453 | logging.error('Failed to quick-provision with gscache with ' |
| 454 | 'error %s', e) |
Congbin Guo | eb7aa2d | 2019-07-15 16:10:44 -0700 | [diff] [blame] | 455 | self._quick_provision_with_devserver(provision_command, |
| 456 | server_name, image_name) |
| 457 | |
Richard Barnette | 3ef29a8 | 2018-06-28 13:52:54 -0700 | [diff] [blame] | 458 | self._set_target_version() |
David Haddock | 77b75c3 | 2020-05-14 01:56:32 -0700 | [diff] [blame] | 459 | return kernel_utils.verify_kernel_state_after_update(self.host) |
Richard Barnette | e86b1ce | 2018-06-07 10:37:23 -0700 | [diff] [blame] | 460 | except Exception: |
| 461 | # N.B. We handle only `Exception` here. Non-Exception |
| 462 | # classes (such as KeyboardInterrupt) are handled by our |
| 463 | # caller. |
Amin Hassani | 18e3988 | 2020-08-10 15:32:10 -0700 | [diff] [blame] | 464 | logging.exception('quick-provision script failed;') |
Richard Barnette | e86b1ce | 2018-06-07 10:37:23 -0700 | [diff] [blame] | 465 | self._revert_boot_partition() |
| 466 | self._reset_stateful_partition() |
| 467 | self._reset_update_engine() |
| 468 | return None |
| 469 | |
| 470 | |
Richard Barnette | 14ee84c | 2018-05-18 20:23:42 +0000 | [diff] [blame] | 471 | def _complete_update(self, expected_kernel): |
| 472 | """Finish the update, and confirm that it succeeded. |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 473 | |
Richard Barnette | 14ee84c | 2018-05-18 20:23:42 +0000 | [diff] [blame] | 474 | Initial condition is that the target build has been downloaded |
| 475 | and installed on the DUT, but has not yet been booted. This |
| 476 | function is responsible for rebooting the DUT, and checking that |
| 477 | the new build is running successfully. |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 478 | |
Richard Barnette | 14ee84c | 2018-05-18 20:23:42 +0000 | [diff] [blame] | 479 | @param expected_kernel: kernel expected to be active after reboot. |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 480 | """ |
Richard Barnette | 14ee84c | 2018-05-18 20:23:42 +0000 | [diff] [blame] | 481 | # Regarding the 'crossystem' command below: In some cases, |
| 482 | # the update flow puts the TPM into a state such that it |
| 483 | # fails verification. We don't know why. However, this |
| 484 | # call papers over the problem by clearing the TPM during |
| 485 | # the reboot. |
| 486 | # |
| 487 | # We ignore failures from 'crossystem'. Although failure |
| 488 | # here is unexpected, and could signal a bug, the point of |
| 489 | # the exercise is to paper over problems; allowing this to |
| 490 | # fail would defeat the purpose. |
| 491 | self._run('crossystem clear_tpm_owner_request=1', |
| 492 | ignore_status=True) |
| 493 | self.host.reboot(timeout=self.host.REBOOT_TIMEOUT) |
| 494 | |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 495 | # Touch the lab machine file to leave a marker that |
| 496 | # distinguishes this image from other test images. |
| 497 | # Afterwards, we must re-run the autoreboot script because |
| 498 | # it depends on the _LAB_MACHINE_FILE. |
| 499 | autoreboot_cmd = ('FILE="%s" ; [ -f "$FILE" ] || ' |
| 500 | '( touch "$FILE" ; start autoreboot )') |
Richard Barnette | 3e8b228 | 2018-05-15 20:42:20 +0000 | [diff] [blame] | 501 | self._run(autoreboot_cmd % _LAB_MACHINE_FILE) |
Sanika Kulkarni | a9c4c33 | 2020-08-18 15:56:28 -0700 | [diff] [blame] | 502 | try: |
| 503 | kernel_utils.verify_boot_expectations( |
| 504 | expected_kernel, NewBuildUpdateError.ROLLBACK_FAILURE, |
| 505 | self.host) |
| 506 | except Exception: |
| 507 | # When the system is rolled back, the provision_failed file is |
| 508 | # removed. So add it back here and re-raise the exception. |
| 509 | self._run('touch %s' % PROVISION_FAILED) |
| 510 | raise |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 511 | |
| 512 | logging.debug('Cleaning up old autotest directories.') |
| 513 | try: |
| 514 | installed_autodir = autotest.Autotest.get_installed_autodir( |
| 515 | self.host) |
| 516 | self._run('rm -rf ' + installed_autodir) |
| 517 | except autotest.AutodirNotFoundError: |
| 518 | logging.debug('No autotest installed directory found.') |
| 519 | |
| 520 | |
Richard Barnette | 4c81b97 | 2018-07-18 12:35:16 -0700 | [diff] [blame] | 521 | def run_update(self): |
| 522 | """Perform a full update of a DUT in the test lab. |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 523 | |
Richard Barnette | 4c81b97 | 2018-07-18 12:35:16 -0700 | [diff] [blame] | 524 | This downloads and installs the root FS and stateful partition |
| 525 | content needed for the update specified in `self.host` and |
| 526 | `self.update_url`. The update is performed according to the |
| 527 | requirements for provisioning a DUT for testing the requested |
| 528 | build. |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 529 | |
Richard Barnette | 4c81b97 | 2018-07-18 12:35:16 -0700 | [diff] [blame] | 530 | At the end of the procedure, metrics are reported describing the |
| 531 | outcome of the operation. |
| 532 | |
| 533 | @returns A tuple of the form `(image_name, attributes)`, where |
| 534 | `image_name` is the name of the image installed, and |
| 535 | `attributes` is new attributes to be applied to the DUT. |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 536 | """ |
Richard Barnette | 4c81b97 | 2018-07-18 12:35:16 -0700 | [diff] [blame] | 537 | server_name = dev_server.get_resolved_hostname(self.update_url) |
| 538 | metrics.Counter(_metric_name('install')).increment( |
| 539 | fields={'devserver': server_name}) |
| 540 | |
Richard Barnette | 9d43e56 | 2018-06-05 17:20:10 +0000 | [diff] [blame] | 541 | try: |
| 542 | self._prepare_host() |
| 543 | except _AttributedUpdateError: |
| 544 | raise |
| 545 | except Exception as e: |
| 546 | logging.exception('Failure preparing host prior to update.') |
| 547 | raise HostUpdateError(self.host.hostname, str(e)) |
| 548 | |
| 549 | try: |
| 550 | expected_kernel = self._install_update() |
| 551 | except _AttributedUpdateError: |
| 552 | raise |
| 553 | except Exception as e: |
| 554 | logging.exception('Failure during download and install.') |
| 555 | raise ImageInstallError(self.host.hostname, server_name, str(e)) |
| 556 | |
Garry Wang | 01a1d48 | 2020-08-02 20:46:53 -0700 | [diff] [blame] | 557 | # Servohost will handle post update process themselves. |
| 558 | if not self._is_servohost: |
| 559 | try: |
| 560 | self._complete_update(expected_kernel) |
| 561 | except _AttributedUpdateError: |
| 562 | raise |
| 563 | except Exception as e: |
| 564 | logging.exception('Failure from build after update.') |
| 565 | raise NewBuildUpdateError(self.update_version, str(e)) |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 566 | |
Richard Barnette | 0beb14b | 2018-05-15 18:07:52 +0000 | [diff] [blame] | 567 | image_name = url_to_image_name(self.update_url) |
| 568 | # update_url is different from devserver url needed to stage autotest |
| 569 | # packages, therefore, resolve a new devserver url here. |
| 570 | devserver_url = dev_server.ImageServer.resolve( |
| 571 | image_name, self.host.hostname).url() |
| 572 | repo_url = tools.get_package_url(devserver_url, image_name) |
| 573 | return image_name, {ds_constants.JOB_REPO_URL: repo_url} |