blob: 5bcc88fc7991e0cdb5decb468d00328b73c1d581 [file] [log] [blame]
Chris Sosa5e4246b2012-05-22 18:05:22 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Sean O'Connor5346e4e2010-08-12 18:49:24 +02002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Sean O'Connor5346e4e2010-08-12 18:49:24 +02005import logging
6import re
Congbin Guo63ae0302019-08-12 16:37:49 -07007import sys
Prashanth B32baa9b2014-03-13 13:23:01 -07008import urllib2
Richard Barnette0beb14b2018-05-15 18:07:52 +00009import urlparse
Sean O'Connor5346e4e2010-08-12 18:49:24 +020010
Chris Sosa65425082013-10-16 13:26:22 -070011from autotest_lib.client.bin import utils
Dale Curtis5c32c722011-05-04 19:24:23 -070012from autotest_lib.client.common_lib import error, global_config
Prashanth B32baa9b2014-03-13 13:23:01 -070013from autotest_lib.client.common_lib.cros import dev_server
David Haddock77b75c32020-05-14 01:56:32 -070014from autotest_lib.client.common_lib.cros import kernel_utils
Richard Barnette0beb14b2018-05-15 18:07:52 +000015from autotest_lib.server import autotest
Shelley Chen61d28982016-10-28 09:40:20 -070016from autotest_lib.server import utils as server_utils
Richard Barnette0beb14b2018-05-15 18:07:52 +000017from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
18from autotest_lib.server.cros.dynamic_suite import tools
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -080019from chromite.lib import retry_util
Dan Shif3a35f72016-01-25 11:18:14 -080020
Shelley Chen16b8df32016-10-27 16:24:21 -070021try:
22 from chromite.lib import metrics
Dan Shi5e2efb72017-02-07 11:40:23 -080023except ImportError:
24 metrics = utils.metrics_mock
Sean O'Connor5346e4e2010-08-12 18:49:24 +020025
Gwendal Grignou3e96cc22017-06-07 16:22:51 -070026
Richard Barnette621a8e42018-06-25 17:34:11 -070027def _metric_name(base_name):
28 return 'chromeos/autotest/provision/' + base_name
29
30
Dale Curtis5c32c722011-05-04 19:24:23 -070031# Local stateful update path is relative to the CrOS source directory.
Sean O'Connor5346e4e2010-08-12 18:49:24 +020032UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
Sean Oc053dfe2010-08-23 18:22:26 +020033UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
beeps5e8c45a2013-12-17 22:05:11 -080034# A list of update engine client states that occur after an update is triggered.
Garry Wangcd769872019-06-07 16:04:17 -070035UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FOR_UPDATE',
beeps5e8c45a2013-12-17 22:05:11 -080036 'UPDATE_STATUS_UPDATE_AVAILABLE',
37 'UPDATE_STATUS_DOWNLOADING',
Garry Wangcd769872019-06-07 16:04:17 -070038 'UPDATE_STATUS_FINALIZING',
39 'UPDATE_STATUS_VERIFYING',
40 'UPDATE_STATUS_REPORTING_ERROR_EVENT',
41 'UPDATE_STATUS_ATTEMPTING_ROLLBACK']
Sean O'Connor5346e4e2010-08-12 18:49:24 +020042
Richard Barnette0beb14b2018-05-15 18:07:52 +000043
Richard Barnette3e8b2282018-05-15 20:42:20 +000044_STATEFUL_UPDATE_SCRIPT = 'stateful_update'
Richard Barnettee86b1ce2018-06-07 10:37:23 -070045_QUICK_PROVISION_SCRIPT = 'quick-provision'
Richard Barnette3e8b2282018-05-15 20:42:20 +000046
47_UPDATER_BIN = '/usr/bin/update_engine_client'
48_UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine']
49
Richard Barnette0beb14b2018-05-15 18:07:52 +000050# PROVISION_FAILED - A flag file to indicate provision failures. The
51# file is created at the start of any AU procedure (see
Richard Barnette9d43e562018-06-05 17:20:10 +000052# `ChromiumOSUpdater._prepare_host()`). The file's location in
Richard Barnette0beb14b2018-05-15 18:07:52 +000053# stateful means that on successul update it will be removed. Thus, if
54# this file exists, it indicates that we've tried and failed in a
55# previous attempt to update.
56PROVISION_FAILED = '/var/tmp/provision_failed'
57
58
Richard Barnette3e8b2282018-05-15 20:42:20 +000059# A flag file used to enable special handling in lab DUTs. Some
60# parts of the system in Chromium OS test images will behave in ways
61# convenient to the test lab when this file is present. Generally,
62# we create this immediately after any update completes.
63_LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
64
65
Richard Barnette3ef29a82018-06-28 13:52:54 -070066# _TARGET_VERSION - A file containing the new version to which we plan
67# to update. This file is used by the CrOS shutdown code to detect and
68# handle certain version downgrade cases. Specifically: Downgrading
69# may trigger an unwanted powerwash in the target build when the
70# following conditions are met:
71# * Source build is a v4.4 kernel with R69-10756.0.0 or later.
72# * Target build predates the R69-10756.0.0 cutoff.
73# When this file is present and indicates a downgrade, the OS shutdown
74# code on the DUT knows how to prevent the powerwash.
75_TARGET_VERSION = '/run/update_target_version'
76
77
Richard Barnette5adb6d42018-06-28 15:52:32 -070078# _REBOOT_FAILURE_MESSAGE - This is the standard message text returned
79# when the Host.reboot() method fails. The source of this text comes
80# from `wait_for_restart()` in client/common_lib/hosts/base_classes.py.
81
82_REBOOT_FAILURE_MESSAGE = 'Host did not return from reboot'
83
84
Congbin Guoeb7aa2d2019-07-15 16:10:44 -070085DEVSERVER_PORT = '8082'
86GS_CACHE_PORT = '8888'
87
88
Richard Barnette9d43e562018-06-05 17:20:10 +000089class RootFSUpdateError(error.TestFail):
Chris Sosa77556d82012-04-05 15:23:14 -070090 """Raised when the RootFS fails to update."""
Chris Sosa77556d82012-04-05 15:23:14 -070091
92
Richard Barnette9d43e562018-06-05 17:20:10 +000093class StatefulUpdateError(error.TestFail):
Chris Sosa77556d82012-04-05 15:23:14 -070094 """Raised when the stateful partition fails to update."""
Chris Sosa77556d82012-04-05 15:23:14 -070095
96
Richard Barnette9d43e562018-06-05 17:20:10 +000097class _AttributedUpdateError(error.TestFail):
98 """Update failure with an attributed cause."""
99
100 def __init__(self, attribution, msg):
101 super(_AttributedUpdateError, self).__init__(
102 '%s: %s' % (attribution, msg))
Richard Barnette5adb6d42018-06-28 15:52:32 -0700103 self._message = msg
104
105 def _classify(self):
106 for err_pattern, classification in self._CLASSIFIERS:
107 if re.match(err_pattern, self._message):
108 return classification
109 return None
110
111 @property
112 def failure_summary(self):
113 """Summarize this error for metrics reporting."""
114 classification = self._classify()
115 if classification:
116 return '%s: %s' % (self._SUMMARY, classification)
117 else:
118 return self._SUMMARY
Richard Barnette9d43e562018-06-05 17:20:10 +0000119
120
121class HostUpdateError(_AttributedUpdateError):
122 """Failure updating a DUT attributable to the DUT.
123
124 This class of exception should be raised when the most likely cause
125 of failure was a condition existing on the DUT prior to the update,
126 such as a hardware problem, or a bug in the software on the DUT.
127 """
128
Richard Barnette5adb6d42018-06-28 15:52:32 -0700129 DUT_DOWN = 'No answer to ssh'
130
131 _SUMMARY = 'DUT failed prior to update'
132 _CLASSIFIERS = [
133 (DUT_DOWN, DUT_DOWN),
134 (_REBOOT_FAILURE_MESSAGE, 'Reboot failed'),
135 ]
136
Richard Barnette9d43e562018-06-05 17:20:10 +0000137 def __init__(self, hostname, msg):
138 super(HostUpdateError, self).__init__(
139 'Error on %s prior to update' % hostname, msg)
140
141
142class DevServerError(_AttributedUpdateError):
143 """Failure updating a DUT attributable to the devserver.
144
145 This class of exception should be raised when the most likely cause
146 of failure was the devserver serving the target image for update.
147 """
148
Richard Barnette5adb6d42018-06-28 15:52:32 -0700149 _SUMMARY = 'Devserver failed prior to update'
150 _CLASSIFIERS = []
151
Richard Barnette9d43e562018-06-05 17:20:10 +0000152 def __init__(self, devserver, msg):
153 super(DevServerError, self).__init__(
154 'Devserver error on %s' % devserver, msg)
155
156
157class ImageInstallError(_AttributedUpdateError):
158 """Failure updating a DUT when installing from the devserver.
159
160 This class of exception should be raised when the target DUT fails
161 to download and install the target image from the devserver, and
162 either the devserver or the DUT might be at fault.
163 """
164
Richard Barnette5adb6d42018-06-28 15:52:32 -0700165 _SUMMARY = 'Image failed to download and install'
166 _CLASSIFIERS = []
167
Richard Barnette9d43e562018-06-05 17:20:10 +0000168 def __init__(self, hostname, devserver, msg):
169 super(ImageInstallError, self).__init__(
170 'Download and install failed from %s onto %s'
171 % (devserver, hostname), msg)
172
173
174class NewBuildUpdateError(_AttributedUpdateError):
175 """Failure updating a DUT attributable to the target build.
176
177 This class of exception should be raised when updating to a new
178 build fails, and the most likely cause of the failure is a bug in
179 the newly installed target build.
180 """
181
Richard Barnette5adb6d42018-06-28 15:52:32 -0700182 CHROME_FAILURE = 'Chrome failed to reach login screen'
183 UPDATE_ENGINE_FAILURE = ('update-engine failed to call '
184 'chromeos-setgoodkernel')
185 ROLLBACK_FAILURE = 'System rolled back to previous build'
186
187 _SUMMARY = 'New build failed'
188 _CLASSIFIERS = [
189 (CHROME_FAILURE, 'Chrome did not start'),
190 (UPDATE_ENGINE_FAILURE, 'update-engine did not start'),
191 (ROLLBACK_FAILURE, ROLLBACK_FAILURE),
192 ]
193
Richard Barnette9d43e562018-06-05 17:20:10 +0000194 def __init__(self, update_version, msg):
195 super(NewBuildUpdateError, self).__init__(
196 'Failure in build %s' % update_version, msg)
197
Richard Barnette621a8e42018-06-25 17:34:11 -0700198 @property
199 def failure_summary(self):
200 #pylint: disable=missing-docstring
201 return 'Build failed to work after installing'
202
Richard Barnette9d43e562018-06-05 17:20:10 +0000203
Richard Barnette3e8b2282018-05-15 20:42:20 +0000204def _url_to_version(update_url):
Dan Shi0f466e82013-02-22 15:44:58 -0800205 """Return the version based on update_url.
206
207 @param update_url: url to the image to update to.
208
209 """
Dale Curtisddfdb942011-07-14 13:59:24 -0700210 # The Chrome OS version is generally the last element in the URL. The only
211 # exception is delta update URLs, which are rooted under the version; e.g.,
212 # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
213 # strip off the au section of the path before reading the version.
Dan Shi5002cfc2013-04-29 10:45:05 -0700214 return re.sub('/au/.*', '',
215 urlparse.urlparse(update_url).path).split('/')[-1].strip()
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200216
217
Scott Zawalskieadbf702013-03-14 09:23:06 -0400218def url_to_image_name(update_url):
219 """Return the image name based on update_url.
220
221 From a URL like:
222 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
223 return lumpy-release/R27-3837.0.0
224
225 @param update_url: url to the image to update to.
226 @returns a string representing the image name in the update_url.
227
228 """
Amin Hassanib04420b2020-07-08 18:46:11 +0000229 return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
Scott Zawalskieadbf702013-03-14 09:23:06 -0400230
231
Richard Barnette4c81b972018-07-18 12:35:16 -0700232def get_update_failure_reason(exception):
233 """Convert an exception into a failure reason for metrics.
234
235 The passed in `exception` should be one raised by failure of
236 `ChromiumOSUpdater.run_update`. The returned string will describe
237 the failure. If the input exception value is not a truish value
238 the return value will be `None`.
239
240 The number of possible return strings is restricted to a limited
241 enumeration of values so that the string may be safely used in
242 Monarch metrics without worrying about cardinality of the range of
243 string values.
244
245 @param exception Exception to be converted to a failure reason.
246
247 @return A string suitable for use in Monarch metrics, or `None`.
248 """
249 if exception:
250 if isinstance(exception, _AttributedUpdateError):
251 return exception.failure_summary
252 else:
253 return 'Unknown Error: %s' % type(exception).__name__
254 return None
255
256
Prashanth B32baa9b2014-03-13 13:23:01 -0700257def _get_devserver_build_from_update_url(update_url):
258 """Get the devserver and build from the update url.
259
260 @param update_url: The url for update.
261 Eg: http://devserver:port/update/build.
262
263 @return: A tuple of (devserver url, build) or None if the update_url
264 doesn't match the expected pattern.
265
266 @raises ValueError: If the update_url doesn't match the expected pattern.
267 @raises ValueError: If no global_config was found, or it doesn't contain an
268 image_url_pattern.
269 """
270 pattern = global_config.global_config.get_config_value(
271 'CROS', 'image_url_pattern', type=str, default='')
272 if not pattern:
273 raise ValueError('Cannot parse update_url, the global config needs '
274 'an image_url_pattern.')
275 re_pattern = pattern.replace('%s', '(\S+)')
276 parts = re.search(re_pattern, update_url)
277 if not parts or len(parts.groups()) < 2:
278 raise ValueError('%s is not an update url' % update_url)
279 return parts.groups()
280
281
Richard Barnette3e8b2282018-05-15 20:42:20 +0000282def _list_image_dir_contents(update_url):
Prashanth B32baa9b2014-03-13 13:23:01 -0700283 """Lists the contents of the devserver for a given build/update_url.
284
285 @param update_url: An update url. Eg: http://devserver:port/update/build.
286 """
287 if not update_url:
288 logging.warning('Need update_url to list contents of the devserver.')
289 return
290 error_msg = 'Cannot check contents of devserver, update url %s' % update_url
291 try:
292 devserver_url, build = _get_devserver_build_from_update_url(update_url)
293 except ValueError as e:
294 logging.warning('%s: %s', error_msg, e)
295 return
296 devserver = dev_server.ImageServer(devserver_url)
297 try:
298 devserver.list_image_dir(build)
299 # The devserver will retry on URLError to avoid flaky connections, but will
300 # eventually raise the URLError if it persists. All HTTPErrors get
301 # converted to DevServerExceptions.
302 except (dev_server.DevServerException, urllib2.URLError) as e:
303 logging.warning('%s: %s', error_msg, e)
304
305
Richard Barnette621a8e42018-06-25 17:34:11 -0700306def _get_metric_fields(update_url):
307 """Return a dict of metric fields.
308
309 This is used for sending autoupdate metrics for the given update URL.
310
311 @param update_url Metrics fields will be calculated from this URL.
312 """
313 build_name = url_to_image_name(update_url)
314 try:
315 board, build_type, milestone, _ = server_utils.ParseBuildName(
316 build_name)
317 except server_utils.ParseBuildNameException:
318 logging.warning('Unable to parse build name %s for metrics. '
319 'Continuing anyway.', build_name)
320 board, build_type, milestone = ('', '', '')
321 return {
322 'dev_server': dev_server.get_resolved_hostname(update_url),
323 'board': board,
324 'build_type': build_type,
325 'milestone': milestone,
326 }
327
328
Richard Barnette3e8b2282018-05-15 20:42:20 +0000329class ChromiumOSUpdater(object):
330 """Chromium OS specific DUT update functionality."""
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700331
Richard Barnette60e759e2018-07-21 20:56:59 -0700332 def __init__(self, update_url, host=None, interactive=True,
Amin Hassanib04420b2020-07-08 18:46:11 +0000333 use_quick_provision=False):
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700334 """Initializes the object.
335
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700336 @param update_url: The URL we want the update to use.
337 @param host: A client.common_lib.hosts.Host implementation.
David Haddock76a4c882017-12-13 18:50:09 -0800338 @param interactive: Bool whether we are doing an interactive update.
Richard Barnette60e759e2018-07-21 20:56:59 -0700339 @param use_quick_provision: Whether we should attempt to perform
340 the update using the quick-provision script.
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700341 """
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700342 self.update_url = update_url
343 self.host = host
David Haddock76a4c882017-12-13 18:50:09 -0800344 self.interactive = interactive
Richard Barnette3e8b2282018-05-15 20:42:20 +0000345 self.update_version = _url_to_version(update_url)
Richard Barnette60e759e2018-07-21 20:56:59 -0700346 self._use_quick_provision = use_quick_provision
Amin Hassanib04420b2020-07-08 18:46:11 +0000347
Richard Barnette3e8b2282018-05-15 20:42:20 +0000348
349 def _run(self, cmd, *args, **kwargs):
350 """Abbreviated form of self.host.run(...)"""
351 return self.host.run(cmd, *args, **kwargs)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700352
353
354 def check_update_status(self):
355 """Returns the current update engine state.
356
357 We use the `update_engine_client -status' command and parse the line
358 indicating the update state, e.g. "CURRENT_OP=UPDATE_STATUS_IDLE".
359 """
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800360 update_status = self.host.run(command='%s -status | grep CURRENT_OP' %
Richard Barnette3e8b2282018-05-15 20:42:20 +0000361 _UPDATER_BIN)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700362 return update_status.stdout.strip().split('=')[-1]
363
364
Richard Barnette55d1af82018-05-22 23:40:14 +0000365 def _rootdev(self, options=''):
366 """Returns the stripped output of rootdev <options>.
367
368 @param options: options to run rootdev.
369
370 """
371 return self._run('rootdev %s' % options).stdout.strip()
372
373
Richard Barnette3e8b2282018-05-15 20:42:20 +0000374 def _get_last_update_error(self):
Shuqian Zhaod9992722016-02-29 12:26:38 -0800375 """Get the last autoupdate error code."""
Richard Barnette3e8b2282018-05-15 20:42:20 +0000376 command_result = self._run(
377 '%s --last_attempt_error' % _UPDATER_BIN)
378 return command_result.stdout.strip().replace('\n', ', ')
Shuqian Zhaod9992722016-02-29 12:26:38 -0800379
380
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800381 def _base_update_handler_no_retry(self, run_args):
Shuqian Zhaod9992722016-02-29 12:26:38 -0800382 """Base function to handle a remote update ssh call.
383
384 @param run_args: Dictionary of args passed to ssh_host.run function.
Shuqian Zhaod9992722016-02-29 12:26:38 -0800385
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800386 @throws: intercepts and re-throws all exceptions
Shuqian Zhaod9992722016-02-29 12:26:38 -0800387 """
Shuqian Zhaod9992722016-02-29 12:26:38 -0800388 try:
389 self.host.run(**run_args)
Shuqian Zhaod9992722016-02-29 12:26:38 -0800390 except Exception as e:
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800391 logging.debug('exception in update handler: %s', e)
392 raise e
Shuqian Zhaod9992722016-02-29 12:26:38 -0800393
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800394
395 def _base_update_handler(self, run_args, err_msg_prefix=None):
396 """Handle a remote update ssh call, possibly with retries.
397
398 @param run_args: Dictionary of args passed to ssh_host.run function.
399 @param err_msg_prefix: Prefix of the exception error message.
400 """
401 def exception_handler(e):
402 """Examines exceptions and returns True if the update handler
403 should be retried.
404
405 @param e: the exception intercepted by the retry util.
406 """
407 return (isinstance(e, error.AutoservSSHTimeout) or
408 (isinstance(e, error.GenericHostRunError) and
409 hasattr(e, 'description') and
410 (re.search('ERROR_CODE=37', e.description) or
411 re.search('generic error .255.', e.description))))
412
413 try:
414 # Try the update twice (arg 2 is max_retry, not including the first
415 # call). Some exceptions may be caught by the retry handler.
416 retry_util.GenericRetry(exception_handler, 1,
417 self._base_update_handler_no_retry,
418 run_args)
419 except Exception as e:
420 message = err_msg_prefix + ': ' + str(e)
421 raise RootFSUpdateError(message)
Shuqian Zhaod9992722016-02-29 12:26:38 -0800422
423
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800424 def _wait_for_update_service(self):
425 """Ensure that the update engine daemon is running, possibly
426 by waiting for it a bit in case the DUT just rebooted and the
427 service hasn't started yet.
428 """
429 def handler(e):
430 """Retry exception handler.
431
432 Assumes that the error is due to the update service not having
433 started yet.
434
435 @param e: the exception intercepted by the retry util.
436 """
437 if isinstance(e, error.AutoservRunError):
438 logging.debug('update service check exception: %s\n'
439 'retrying...', e)
440 return True
441 else:
442 return False
443
444 # Retry at most three times, every 5s.
445 status = retry_util.GenericRetry(handler, 3,
446 self.check_update_status,
447 sleep=5)
448
449 # Expect the update engine to be idle.
450 if status != UPDATER_IDLE:
Richard Barnette9d43e562018-06-05 17:20:10 +0000451 raise RootFSUpdateError(
452 'Update engine status is %s (%s was expected).'
453 % (status, UPDATER_IDLE))
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800454
455
Richard Barnette55d1af82018-05-22 23:40:14 +0000456 def _reset_update_engine(self):
457 """Resets the host to prepare for a clean update regardless of state."""
458 self._run('stop ui || true')
459 self._run('stop update-engine || true')
460 self._run('start update-engine')
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800461 self._wait_for_update_service()
462
Richard Barnette55d1af82018-05-22 23:40:14 +0000463
464 def _reset_stateful_partition(self):
465 """Clear any pending stateful update request."""
Richard Barnette18fd5842018-05-25 18:21:14 +0000466 self._run('%s --stateful_change=reset 2>&1'
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700467 % self._get_stateful_update_script())
Richard Barnette3ef29a82018-06-28 13:52:54 -0700468 self._run('rm -f %s' % _TARGET_VERSION)
469
470
471 def _set_target_version(self):
472 """Set the "target version" for the update."""
Amin Hassanib04420b2020-07-08 18:46:11 +0000473 version_number = self.update_version.split('-')[1]
Richard Barnette3ef29a82018-06-28 13:52:54 -0700474 self._run('echo %s > %s' % (version_number, _TARGET_VERSION))
Richard Barnette55d1af82018-05-22 23:40:14 +0000475
476
477 def _revert_boot_partition(self):
478 """Revert the boot partition."""
479 part = self._rootdev('-s')
480 logging.warning('Reverting update; Boot partition will be %s', part)
481 return self._run('/postinst %s 2>&1' % part)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700482
483
484 def _verify_update_completed(self):
485 """Verifies that an update has completed.
486
Richard Barnette9d43e562018-06-05 17:20:10 +0000487 @raise RootFSUpdateError if the DUT doesn't indicate that
488 download is complete and the DUT is ready for reboot.
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700489 """
490 status = self.check_update_status()
491 if status != UPDATER_NEED_REBOOT:
Shuqian Zhaod9992722016-02-29 12:26:38 -0800492 error_msg = ''
493 if status == UPDATER_IDLE:
Richard Barnette3e8b2282018-05-15 20:42:20 +0000494 error_msg = 'Update error: %s' % self._get_last_update_error()
Richard Barnette9d43e562018-06-05 17:20:10 +0000495 raise RootFSUpdateError(
496 'Update engine status is %s (%s was expected). %s'
497 % (status, UPDATER_NEED_REBOOT, error_msg))
David Haddock77b75c32020-05-14 01:56:32 -0700498 return kernel_utils.verify_kernel_state_after_update(self.host)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700499
500
Richard Barnette55d1af82018-05-22 23:40:14 +0000501 def trigger_update(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000502 """Triggers a background update."""
503 # If this function is called immediately after reboot (which it
504 # can be), there is no guarantee that the update engine is up
505 # and running yet, so wait for it.
Richard Barnette55d1af82018-05-22 23:40:14 +0000506 self._wait_for_update_service()
507
508 autoupdate_cmd = ('%s --check_for_update --omaha_url=%s' %
509 (_UPDATER_BIN, self.update_url))
510 run_args = {'command': autoupdate_cmd}
511 err_prefix = 'Failed to trigger an update on %s. ' % self.host.hostname
512 logging.info('Triggering update via: %s', autoupdate_cmd)
513 metric_fields = {'success': False}
514 try:
515 self._base_update_handler(run_args, err_prefix)
516 metric_fields['success'] = True
517 finally:
518 c = metrics.Counter('chromeos/autotest/autoupdater/trigger')
Richard Barnette621a8e42018-06-25 17:34:11 -0700519 metric_fields.update(_get_metric_fields(self.update_url))
Richard Barnette55d1af82018-05-22 23:40:14 +0000520 c.increment(fields=metric_fields)
521
522
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700523 def update_image(self):
Richard Barnette18fd5842018-05-25 18:21:14 +0000524 """Updates the device root FS and kernel and verifies success."""
Shuqian Zhaofe4d62e2016-06-23 14:46:45 -0700525 autoupdate_cmd = ('%s --update --omaha_url=%s' %
Richard Barnette3e8b2282018-05-15 20:42:20 +0000526 (_UPDATER_BIN, self.update_url))
David Haddock76a4c882017-12-13 18:50:09 -0800527 if not self.interactive:
528 autoupdate_cmd = '%s --interactive=false' % autoupdate_cmd
Shuqian Zhaod9992722016-02-29 12:26:38 -0800529 run_args = {'command': autoupdate_cmd, 'timeout': 3600}
530 err_prefix = ('Failed to install device image using payload at %s '
531 'on %s. ' % (self.update_url, self.host.hostname))
532 logging.info('Updating image via: %s', autoupdate_cmd)
Allen Li1a5cc0a2017-06-20 14:08:59 -0700533 metric_fields = {'success': False}
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800534 try:
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800535 self._base_update_handler(run_args, err_prefix)
Allen Li1a5cc0a2017-06-20 14:08:59 -0700536 metric_fields['success'] = True
537 finally:
Allen Li1a5cc0a2017-06-20 14:08:59 -0700538 c = metrics.Counter('chromeos/autotest/autoupdater/update')
Richard Barnette621a8e42018-06-25 17:34:11 -0700539 metric_fields.update(_get_metric_fields(self.update_url))
Allen Li1a5cc0a2017-06-20 14:08:59 -0700540 c.increment(fields=metric_fields)
Richard Barnette4d211c92018-05-24 18:56:08 +0000541 return self._verify_update_completed()
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700542
543
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700544 def _get_remote_script(self, script_name):
545 """Ensure that `script_name` is present on the DUT.
Chris Sosa5e4246b2012-05-22 18:05:22 -0700546
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700547 The given script (e.g. `stateful_update`) may be present in the
548 stateful partition under /usr/local/bin, or we may have to
549 download it from the devserver.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700550
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700551 Determine whether the script is present or must be downloaded
552 and download if necessary. Then, return a command fragment
553 sufficient to run the script from whereever it now lives on the
554 DUT.
Richard Barnette9d43e562018-06-05 17:20:10 +0000555
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700556 @param script_name The name of the script as expected in
557 /usr/local/bin and on the devserver.
558 @return A string with the command (minus arguments) that will
559 run the target script.
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700560 """
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700561 remote_script = '/usr/local/bin/%s' % script_name
562 if self.host.path_exists(remote_script):
563 return remote_script
Laurence Goodby06fb42c2020-02-29 17:14:42 -0800564 self.host.run('mkdir -p -m 1777 /usr/local/tmp')
565 remote_tmp_script = '/usr/local/tmp/%s' % script_name
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700566 server_name = urlparse.urlparse(self.update_url)[1]
567 script_url = 'http://%s/static/%s' % (server_name, script_name)
Dana Goyette353d1d92019-06-27 10:43:59 -0700568 fetch_script = 'curl -Ss -o %s %s && head -1 %s' % (
569 remote_tmp_script, script_url, remote_tmp_script)
Chris Sosa5e4246b2012-05-22 18:05:22 -0700570
Dana Goyette353d1d92019-06-27 10:43:59 -0700571 first_line = self._run(fetch_script).stdout.strip()
572
573 if first_line and first_line.startswith('#!'):
574 script_interpreter = first_line.lstrip('#!')
575 if script_interpreter:
576 return '%s %s' % (script_interpreter, remote_tmp_script)
577 return None
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700578
579 def _get_stateful_update_script(self):
580 """Returns a command to run the stateful update script.
581
582 Find `stateful_update` on the target or install it, as
583 necessary. If installation fails, raise an exception.
584
585 @raise StatefulUpdateError if the script can't be found or
586 installed.
587 @return A string that can be joined with arguments to run the
588 `stateful_update` command on the DUT.
589 """
590 script_command = self._get_remote_script(_STATEFUL_UPDATE_SCRIPT)
591 if not script_command:
592 raise StatefulUpdateError('Could not install %s on DUT'
Richard Barnette9d43e562018-06-05 17:20:10 +0000593 % _STATEFUL_UPDATE_SCRIPT)
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700594 return script_command
Chris Sosa5e4246b2012-05-22 18:05:22 -0700595
596
Chris Sosa72312602013-04-16 15:01:56 -0700597 def update_stateful(self, clobber=True):
598 """Updates the stateful partition.
599
600 @param clobber: If True, a clean stateful installation.
Richard Barnette9d43e562018-06-05 17:20:10 +0000601
602 @raise StatefulUpdateError if the update script fails to
603 complete successfully.
Chris Sosa72312602013-04-16 15:01:56 -0700604 """
Chris Sosa77556d82012-04-05 15:23:14 -0700605 logging.info('Updating stateful partition...')
Richard Barnette18fd5842018-05-25 18:21:14 +0000606 statefuldev_url = self.update_url.replace('update', 'static')
Chris Sosaa3ac2152012-05-23 22:23:13 -0700607
Dale Curtis5c32c722011-05-04 19:24:23 -0700608 # Attempt stateful partition update; this must succeed so that the newly
609 # installed host is testable after update.
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700610 statefuldev_cmd = [self._get_stateful_update_script(), statefuldev_url]
Chris Sosa72312602013-04-16 15:01:56 -0700611 if clobber:
612 statefuldev_cmd.append('--stateful_change=clean')
613
614 statefuldev_cmd.append('2>&1')
Dale Curtis5c32c722011-05-04 19:24:23 -0700615 try:
Dan Shi205b8732016-01-25 10:56:22 -0800616 self._run(' '.join(statefuldev_cmd), timeout=1200)
Dale Curtis5c32c722011-05-04 19:24:23 -0700617 except error.AutoservRunError:
Richard Barnette18fd5842018-05-25 18:21:14 +0000618 raise StatefulUpdateError(
Gilad Arnold62cf3a42015-10-01 09:15:25 -0700619 'Failed to perform stateful update on %s' %
620 self.host.hostname)
Dale Curtis5c32c722011-05-04 19:24:23 -0700621
Chris Sosaa3ac2152012-05-23 22:23:13 -0700622
Richard Barnette14ee84c2018-05-18 20:23:42 +0000623 def _prepare_host(self):
624 """Make sure the target DUT is working and ready for update.
625
626 Initially, the target DUT's state is unknown. The DUT is
627 expected to be online, but we strive to be forgiving if Chrome
628 and/or the update engine aren't fully functional.
629 """
630 # Summary of work, and the rationale:
631 # 1. Reboot, because it's a good way to clear out problems.
632 # 2. Touch the PROVISION_FAILED file, to allow repair to detect
633 # failure later.
634 # 3. Run the hook for host class specific preparation.
635 # 4. Stop Chrome, because the system is designed to eventually
636 # reboot if Chrome is stuck in a crash loop.
637 # 5. Force `update-engine` to start, because if Chrome failed
638 # to start properly, the status of the `update-engine` job
639 # will be uncertain.
Richard Barnette5adb6d42018-06-28 15:52:32 -0700640 if not self.host.is_up():
641 raise HostUpdateError(self.host.hostname,
642 HostUpdateError.DUT_DOWN)
Richard Barnette14ee84c2018-05-18 20:23:42 +0000643 self._reset_stateful_partition()
644 self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
645 self._run('touch %s' % PROVISION_FAILED)
646 self.host.prepare_for_update()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700647 self._reset_update_engine()
Richard Barnette14ee84c2018-05-18 20:23:42 +0000648 logging.info('Updating from version %s to %s.',
649 self.host.get_release_version(),
650 self.update_version)
651
Amin Hassanic0722962020-07-14 19:37:45 +0000652
653 def _install_via_update_engine(self):
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700654 """Install an updating using the production AU flow.
655
Amin Hassanic0722962020-07-14 19:37:45 +0000656 This uses the standard AU flow and the `stateful_update` script
657 to download and install a root FS, kernel and stateful
658 filesystem content.
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700659
660 @return The kernel expected to be booted next.
661 """
662 logging.info('Installing image using update_engine.')
663 expected_kernel = self.update_image()
664 self.update_stateful()
Richard Barnette3ef29a82018-06-28 13:52:54 -0700665 self._set_target_version()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700666 return expected_kernel
667
668
Congbin Guoeb7aa2d2019-07-15 16:10:44 -0700669 def _quick_provision_with_gs_cache(self, provision_command, devserver_name,
670 image_name):
671 """Run quick_provision using GsCache server.
672
673 @param provision_command: The path of quick_provision command.
674 @param devserver_name: The devserver name and port (optional).
675 @param image_name: The image to be installed.
676 """
677 logging.info('Try quick provision with gs_cache.')
678 # If enabled, GsCache server listion on different port on the
679 # devserver.
680 gs_cache_server = devserver_name.replace(DEVSERVER_PORT, GS_CACHE_PORT)
Amin Hassanib04420b2020-07-08 18:46:11 +0000681 gs_cache_url = ('http://%s/download/chromeos-image-archive'
682 % gs_cache_server)
Congbin Guoeb7aa2d2019-07-15 16:10:44 -0700683
684 # Check if GS_Cache server is enabled on the server.
Congbin Guo4a2a6642019-08-12 15:03:01 -0700685 self._run('curl -s -o /dev/null %s' % gs_cache_url)
Congbin Guoeb7aa2d2019-07-15 16:10:44 -0700686
687 command = '%s --noreboot %s %s' % (provision_command, image_name,
688 gs_cache_url)
689 self._run(command)
690 metrics.Counter(_metric_name('quick_provision')).increment(
691 fields={'devserver': devserver_name, 'gs_cache': True})
692
693
694 def _quick_provision_with_devserver(self, provision_command,
695 devserver_name, image_name):
696 """Run quick_provision using legacy devserver.
697
698 @param provision_command: The path of quick_provision command.
699 @param devserver_name: The devserver name and port (optional).
700 @param image_name: The image to be installed.
701 """
Congbin Guo63ae0302019-08-12 16:37:49 -0700702 logging.info('Try quick provision with devserver.')
703 ds = dev_server.ImageServer('http://%s' % devserver_name)
704 try:
Amin Hassanic0722962020-07-14 19:37:45 +0000705 ds.stage_artifacts(image_name, ['quick_provision', 'stateful'])
Congbin Guo63ae0302019-08-12 16:37:49 -0700706 except dev_server.DevServerException as e:
707 raise error.TestFail, str(e), sys.exc_info()[2]
708
Congbin Guoeb7aa2d2019-07-15 16:10:44 -0700709 static_url = 'http://%s/static' % devserver_name
710 command = '%s --noreboot %s %s' % (provision_command, image_name,
711 static_url)
Congbin Guoeb7aa2d2019-07-15 16:10:44 -0700712 self._run(command)
713 metrics.Counter(_metric_name('quick_provision')).increment(
714 fields={'devserver': devserver_name, 'gs_cache': False})
715
716
Amin Hassanic0722962020-07-14 19:37:45 +0000717 def _install_via_quick_provision(self):
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700718 """Install an updating using the `quick-provision` script.
719
720 This uses the `quick-provision` script to download and install
721 a root FS, kernel and stateful filesystem content.
722
723 @return The kernel expected to be booted next.
724 """
Amin Hassanic0722962020-07-14 19:37:45 +0000725 if not self._use_quick_provision:
726 return None
727 image_name = url_to_image_name(self.update_url)
Amin Hassanib04420b2020-07-08 18:46:11 +0000728 logging.info('Installing image using quick-provision.')
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700729 provision_command = self._get_remote_script(_QUICK_PROVISION_SCRIPT)
Amin Hassanic0722962020-07-14 19:37:45 +0000730 server_name = urlparse.urlparse(self.update_url)[1]
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700731 try:
Congbin Guoeb7aa2d2019-07-15 16:10:44 -0700732 try:
733 self._quick_provision_with_gs_cache(provision_command,
734 server_name, image_name)
735 except Exception:
736 self._quick_provision_with_devserver(provision_command,
737 server_name, image_name)
738
Richard Barnette3ef29a82018-06-28 13:52:54 -0700739 self._set_target_version()
David Haddock77b75c32020-05-14 01:56:32 -0700740 return kernel_utils.verify_kernel_state_after_update(self.host)
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700741 except Exception:
742 # N.B. We handle only `Exception` here. Non-Exception
743 # classes (such as KeyboardInterrupt) are handled by our
744 # caller.
745 logging.exception('quick-provision script failed; '
746 'will fall back to update_engine.')
747 self._revert_boot_partition()
748 self._reset_stateful_partition()
749 self._reset_update_engine()
750 return None
751
752
Richard Barnette54d14f52018-05-18 16:39:49 +0000753 def _install_update(self):
Richard Barnette0beb14b2018-05-15 18:07:52 +0000754 """Install the requested image on the DUT, but don't start it.
755
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700756 This downloads and installs a root FS, kernel and stateful
757 filesystem content. This does not reboot the DUT, so the update
758 is merely pending when the method returns.
759
760 @return The kernel expected to be booted next.
Dan Shi0f466e82013-02-22 15:44:58 -0800761 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000762 logging.info('Installing image at %s onto %s',
763 self.update_url, self.host.hostname)
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200764 try:
Amin Hassanic0722962020-07-14 19:37:45 +0000765 return (self._install_via_quick_provision()
766 or self._install_via_update_engine())
Dale Curtis1e973182011-07-12 18:21:36 -0700767 except:
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700768 # N.B. This handling code includes non-Exception classes such
769 # as KeyboardInterrupt. We need to clean up, but we also must
770 # re-raise.
Richard Barnette14ee84c2018-05-18 20:23:42 +0000771 self._revert_boot_partition()
772 self._reset_stateful_partition()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700773 self._reset_update_engine()
Dale Curtis1e973182011-07-12 18:21:36 -0700774 # Collect update engine logs in the event of failure.
775 if self.host.job:
Aviv Keshet2610d3e2016-06-01 16:37:01 -0700776 logging.info('Collecting update engine logs due to failure...')
Dale Curtis1e973182011-07-12 18:21:36 -0700777 self.host.get_file(
Richard Barnette3e8b2282018-05-15 20:42:20 +0000778 _UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
Gilad Arnold0c0df732015-09-21 06:37:59 -0700779 preserve_perm=False)
Richard Barnette3e8b2282018-05-15 20:42:20 +0000780 _list_image_dir_contents(self.update_url)
Dale Curtis1e973182011-07-12 18:21:36 -0700781 raise
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200782
783
Richard Barnette14ee84c2018-05-18 20:23:42 +0000784 def _complete_update(self, expected_kernel):
785 """Finish the update, and confirm that it succeeded.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000786
Richard Barnette14ee84c2018-05-18 20:23:42 +0000787 Initial condition is that the target build has been downloaded
788 and installed on the DUT, but has not yet been booted. This
789 function is responsible for rebooting the DUT, and checking that
790 the new build is running successfully.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000791
Richard Barnette14ee84c2018-05-18 20:23:42 +0000792 @param expected_kernel: kernel expected to be active after reboot.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000793 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000794 # Regarding the 'crossystem' command below: In some cases,
795 # the update flow puts the TPM into a state such that it
796 # fails verification. We don't know why. However, this
797 # call papers over the problem by clearing the TPM during
798 # the reboot.
799 #
800 # We ignore failures from 'crossystem'. Although failure
801 # here is unexpected, and could signal a bug, the point of
802 # the exercise is to paper over problems; allowing this to
803 # fail would defeat the purpose.
804 self._run('crossystem clear_tpm_owner_request=1',
805 ignore_status=True)
806 self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
807
Richard Barnette0beb14b2018-05-15 18:07:52 +0000808 # Touch the lab machine file to leave a marker that
809 # distinguishes this image from other test images.
810 # Afterwards, we must re-run the autoreboot script because
811 # it depends on the _LAB_MACHINE_FILE.
812 autoreboot_cmd = ('FILE="%s" ; [ -f "$FILE" ] || '
813 '( touch "$FILE" ; start autoreboot )')
Richard Barnette3e8b2282018-05-15 20:42:20 +0000814 self._run(autoreboot_cmd % _LAB_MACHINE_FILE)
David Haddock3446a642020-05-26 03:26:49 -0700815 kernel_utils.verify_boot_expectations(
816 expected_kernel, NewBuildUpdateError.ROLLBACK_FAILURE, self.host)
Richard Barnette0beb14b2018-05-15 18:07:52 +0000817
818 logging.debug('Cleaning up old autotest directories.')
819 try:
820 installed_autodir = autotest.Autotest.get_installed_autodir(
821 self.host)
822 self._run('rm -rf ' + installed_autodir)
823 except autotest.AutodirNotFoundError:
824 logging.debug('No autotest installed directory found.')
825
826
Richard Barnette4c81b972018-07-18 12:35:16 -0700827 def run_update(self):
828 """Perform a full update of a DUT in the test lab.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000829
Richard Barnette4c81b972018-07-18 12:35:16 -0700830 This downloads and installs the root FS and stateful partition
831 content needed for the update specified in `self.host` and
832 `self.update_url`. The update is performed according to the
833 requirements for provisioning a DUT for testing the requested
834 build.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000835
Richard Barnette4c81b972018-07-18 12:35:16 -0700836 At the end of the procedure, metrics are reported describing the
837 outcome of the operation.
838
839 @returns A tuple of the form `(image_name, attributes)`, where
840 `image_name` is the name of the image installed, and
841 `attributes` is new attributes to be applied to the DUT.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000842 """
Richard Barnette4c81b972018-07-18 12:35:16 -0700843 server_name = dev_server.get_resolved_hostname(self.update_url)
844 metrics.Counter(_metric_name('install')).increment(
845 fields={'devserver': server_name})
846
Richard Barnette9d43e562018-06-05 17:20:10 +0000847 try:
848 self._prepare_host()
849 except _AttributedUpdateError:
850 raise
851 except Exception as e:
852 logging.exception('Failure preparing host prior to update.')
853 raise HostUpdateError(self.host.hostname, str(e))
854
855 try:
856 expected_kernel = self._install_update()
857 except _AttributedUpdateError:
858 raise
859 except Exception as e:
860 logging.exception('Failure during download and install.')
861 raise ImageInstallError(self.host.hostname, server_name, str(e))
862
863 try:
864 self._complete_update(expected_kernel)
865 except _AttributedUpdateError:
866 raise
867 except Exception as e:
868 logging.exception('Failure from build after update.')
869 raise NewBuildUpdateError(self.update_version, str(e))
Richard Barnette0beb14b2018-05-15 18:07:52 +0000870
Richard Barnette0beb14b2018-05-15 18:07:52 +0000871 image_name = url_to_image_name(self.update_url)
872 # update_url is different from devserver url needed to stage autotest
873 # packages, therefore, resolve a new devserver url here.
874 devserver_url = dev_server.ImageServer.resolve(
875 image_name, self.host.hostname).url()
876 repo_url = tools.get_package_url(devserver_url, image_name)
877 return image_name, {ds_constants.JOB_REPO_URL: repo_url}