blob: 29b0e4e6f8e390185a55ec1c197906dbf189dd72 [file] [log] [blame]
Chris Sosa5e4246b2012-05-22 18:05:22 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Sean O'Connor5346e4e2010-08-12 18:49:24 +02002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Don Garrett56b1cc82013-12-06 17:49:20 -08005import glob
Sean O'Connor5346e4e2010-08-12 18:49:24 +02006import logging
Dale Curtis5c32c722011-05-04 19:24:23 -07007import os
Sean O'Connor5346e4e2010-08-12 18:49:24 +02008import re
Prashanth B32baa9b2014-03-13 13:23:01 -07009import urllib2
Richard Barnette0beb14b2018-05-15 18:07:52 +000010import urlparse
Sean O'Connor5346e4e2010-08-12 18:49:24 +020011
Chris Sosa65425082013-10-16 13:26:22 -070012from autotest_lib.client.bin import utils
Dale Curtis5c32c722011-05-04 19:24:23 -070013from autotest_lib.client.common_lib import error, global_config
Prashanth B32baa9b2014-03-13 13:23:01 -070014from autotest_lib.client.common_lib.cros import dev_server
Richard Barnette0beb14b2018-05-15 18:07:52 +000015from autotest_lib.server import autotest
Shelley Chen61d28982016-10-28 09:40:20 -070016from autotest_lib.server import utils as server_utils
Richard Barnette0beb14b2018-05-15 18:07:52 +000017from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
18from autotest_lib.server.cros.dynamic_suite import tools
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -080019from chromite.lib import retry_util
Dan Shif3a35f72016-01-25 11:18:14 -080020
Shelley Chen16b8df32016-10-27 16:24:21 -070021try:
22 from chromite.lib import metrics
Dan Shi5e2efb72017-02-07 11:40:23 -080023except ImportError:
24 metrics = utils.metrics_mock
Sean O'Connor5346e4e2010-08-12 18:49:24 +020025
Gwendal Grignou3e96cc22017-06-07 16:22:51 -070026
Richard Barnette621a8e42018-06-25 17:34:11 -070027def _metric_name(base_name):
28 return 'chromeos/autotest/provision/' + base_name
29
30
Dale Curtis5c32c722011-05-04 19:24:23 -070031# Local stateful update path is relative to the CrOS source directory.
Sean O'Connor5346e4e2010-08-12 18:49:24 +020032UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
Sean Oc053dfe2010-08-23 18:22:26 +020033UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
beeps5e8c45a2013-12-17 22:05:11 -080034# A list of update engine client states that occur after an update is triggered.
Garry Wangcd769872019-06-07 16:04:17 -070035UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FOR_UPDATE',
beeps5e8c45a2013-12-17 22:05:11 -080036 'UPDATE_STATUS_UPDATE_AVAILABLE',
37 'UPDATE_STATUS_DOWNLOADING',
Garry Wangcd769872019-06-07 16:04:17 -070038 'UPDATE_STATUS_FINALIZING',
39 'UPDATE_STATUS_VERIFYING',
40 'UPDATE_STATUS_REPORTING_ERROR_EVENT',
41 'UPDATE_STATUS_ATTEMPTING_ROLLBACK']
Sean O'Connor5346e4e2010-08-12 18:49:24 +020042
Richard Barnette0beb14b2018-05-15 18:07:52 +000043
Richard Barnette3e8b2282018-05-15 20:42:20 +000044_STATEFUL_UPDATE_SCRIPT = 'stateful_update'
Richard Barnettee86b1ce2018-06-07 10:37:23 -070045_QUICK_PROVISION_SCRIPT = 'quick-provision'
Richard Barnette3e8b2282018-05-15 20:42:20 +000046
47_UPDATER_BIN = '/usr/bin/update_engine_client'
48_UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine']
49
50_KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3}
51_KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5}
52
53# Time to wait for new kernel to be marked successful after
54# auto update.
55_KERNEL_UPDATE_TIMEOUT = 120
56
57
Richard Barnette0beb14b2018-05-15 18:07:52 +000058# PROVISION_FAILED - A flag file to indicate provision failures. The
59# file is created at the start of any AU procedure (see
Richard Barnette9d43e562018-06-05 17:20:10 +000060# `ChromiumOSUpdater._prepare_host()`). The file's location in
Richard Barnette0beb14b2018-05-15 18:07:52 +000061# stateful means that on successul update it will be removed. Thus, if
62# this file exists, it indicates that we've tried and failed in a
63# previous attempt to update.
64PROVISION_FAILED = '/var/tmp/provision_failed'
65
66
Richard Barnette3e8b2282018-05-15 20:42:20 +000067# A flag file used to enable special handling in lab DUTs. Some
68# parts of the system in Chromium OS test images will behave in ways
69# convenient to the test lab when this file is present. Generally,
70# we create this immediately after any update completes.
71_LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
72
73
Richard Barnette3ef29a82018-06-28 13:52:54 -070074# _TARGET_VERSION - A file containing the new version to which we plan
75# to update. This file is used by the CrOS shutdown code to detect and
76# handle certain version downgrade cases. Specifically: Downgrading
77# may trigger an unwanted powerwash in the target build when the
78# following conditions are met:
79# * Source build is a v4.4 kernel with R69-10756.0.0 or later.
80# * Target build predates the R69-10756.0.0 cutoff.
81# When this file is present and indicates a downgrade, the OS shutdown
82# code on the DUT knows how to prevent the powerwash.
83_TARGET_VERSION = '/run/update_target_version'
84
85
Richard Barnette5adb6d42018-06-28 15:52:32 -070086# _REBOOT_FAILURE_MESSAGE - This is the standard message text returned
87# when the Host.reboot() method fails. The source of this text comes
88# from `wait_for_restart()` in client/common_lib/hosts/base_classes.py.
89
90_REBOOT_FAILURE_MESSAGE = 'Host did not return from reboot'
91
92
Congbin Guoeb7aa2d2019-07-15 16:10:44 -070093DEVSERVER_PORT = '8082'
94GS_CACHE_PORT = '8888'
95
96
Richard Barnette9d43e562018-06-05 17:20:10 +000097class RootFSUpdateError(error.TestFail):
Chris Sosa77556d82012-04-05 15:23:14 -070098 """Raised when the RootFS fails to update."""
Chris Sosa77556d82012-04-05 15:23:14 -070099
100
Richard Barnette9d43e562018-06-05 17:20:10 +0000101class StatefulUpdateError(error.TestFail):
Chris Sosa77556d82012-04-05 15:23:14 -0700102 """Raised when the stateful partition fails to update."""
Chris Sosa77556d82012-04-05 15:23:14 -0700103
104
Richard Barnette9d43e562018-06-05 17:20:10 +0000105class _AttributedUpdateError(error.TestFail):
106 """Update failure with an attributed cause."""
107
108 def __init__(self, attribution, msg):
109 super(_AttributedUpdateError, self).__init__(
110 '%s: %s' % (attribution, msg))
Richard Barnette5adb6d42018-06-28 15:52:32 -0700111 self._message = msg
112
113 def _classify(self):
114 for err_pattern, classification in self._CLASSIFIERS:
115 if re.match(err_pattern, self._message):
116 return classification
117 return None
118
119 @property
120 def failure_summary(self):
121 """Summarize this error for metrics reporting."""
122 classification = self._classify()
123 if classification:
124 return '%s: %s' % (self._SUMMARY, classification)
125 else:
126 return self._SUMMARY
Richard Barnette9d43e562018-06-05 17:20:10 +0000127
128
129class HostUpdateError(_AttributedUpdateError):
130 """Failure updating a DUT attributable to the DUT.
131
132 This class of exception should be raised when the most likely cause
133 of failure was a condition existing on the DUT prior to the update,
134 such as a hardware problem, or a bug in the software on the DUT.
135 """
136
Richard Barnette5adb6d42018-06-28 15:52:32 -0700137 DUT_DOWN = 'No answer to ssh'
138
139 _SUMMARY = 'DUT failed prior to update'
140 _CLASSIFIERS = [
141 (DUT_DOWN, DUT_DOWN),
142 (_REBOOT_FAILURE_MESSAGE, 'Reboot failed'),
143 ]
144
Richard Barnette9d43e562018-06-05 17:20:10 +0000145 def __init__(self, hostname, msg):
146 super(HostUpdateError, self).__init__(
147 'Error on %s prior to update' % hostname, msg)
148
149
150class DevServerError(_AttributedUpdateError):
151 """Failure updating a DUT attributable to the devserver.
152
153 This class of exception should be raised when the most likely cause
154 of failure was the devserver serving the target image for update.
155 """
156
Richard Barnette5adb6d42018-06-28 15:52:32 -0700157 _SUMMARY = 'Devserver failed prior to update'
158 _CLASSIFIERS = []
159
Richard Barnette9d43e562018-06-05 17:20:10 +0000160 def __init__(self, devserver, msg):
161 super(DevServerError, self).__init__(
162 'Devserver error on %s' % devserver, msg)
163
164
165class ImageInstallError(_AttributedUpdateError):
166 """Failure updating a DUT when installing from the devserver.
167
168 This class of exception should be raised when the target DUT fails
169 to download and install the target image from the devserver, and
170 either the devserver or the DUT might be at fault.
171 """
172
Richard Barnette5adb6d42018-06-28 15:52:32 -0700173 _SUMMARY = 'Image failed to download and install'
174 _CLASSIFIERS = []
175
Richard Barnette9d43e562018-06-05 17:20:10 +0000176 def __init__(self, hostname, devserver, msg):
177 super(ImageInstallError, self).__init__(
178 'Download and install failed from %s onto %s'
179 % (devserver, hostname), msg)
180
181
182class NewBuildUpdateError(_AttributedUpdateError):
183 """Failure updating a DUT attributable to the target build.
184
185 This class of exception should be raised when updating to a new
186 build fails, and the most likely cause of the failure is a bug in
187 the newly installed target build.
188 """
189
Richard Barnette5adb6d42018-06-28 15:52:32 -0700190 CHROME_FAILURE = 'Chrome failed to reach login screen'
191 UPDATE_ENGINE_FAILURE = ('update-engine failed to call '
192 'chromeos-setgoodkernel')
193 ROLLBACK_FAILURE = 'System rolled back to previous build'
194
195 _SUMMARY = 'New build failed'
196 _CLASSIFIERS = [
197 (CHROME_FAILURE, 'Chrome did not start'),
198 (UPDATE_ENGINE_FAILURE, 'update-engine did not start'),
199 (ROLLBACK_FAILURE, ROLLBACK_FAILURE),
200 ]
201
Richard Barnette9d43e562018-06-05 17:20:10 +0000202 def __init__(self, update_version, msg):
203 super(NewBuildUpdateError, self).__init__(
204 'Failure in build %s' % update_version, msg)
205
Richard Barnette621a8e42018-06-25 17:34:11 -0700206 @property
207 def failure_summary(self):
208 #pylint: disable=missing-docstring
209 return 'Build failed to work after installing'
210
Richard Barnette9d43e562018-06-05 17:20:10 +0000211
Richard Barnette3e8b2282018-05-15 20:42:20 +0000212def _url_to_version(update_url):
Dan Shi0f466e82013-02-22 15:44:58 -0800213 """Return the version based on update_url.
214
215 @param update_url: url to the image to update to.
216
217 """
Dale Curtisddfdb942011-07-14 13:59:24 -0700218 # The Chrome OS version is generally the last element in the URL. The only
219 # exception is delta update URLs, which are rooted under the version; e.g.,
220 # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
221 # strip off the au section of the path before reading the version.
Dan Shi5002cfc2013-04-29 10:45:05 -0700222 return re.sub('/au/.*', '',
223 urlparse.urlparse(update_url).path).split('/')[-1].strip()
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200224
225
Scott Zawalskieadbf702013-03-14 09:23:06 -0400226def url_to_image_name(update_url):
227 """Return the image name based on update_url.
228
229 From a URL like:
230 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
231 return lumpy-release/R27-3837.0.0
232
233 @param update_url: url to the image to update to.
234 @returns a string representing the image name in the update_url.
235
236 """
237 return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
238
239
Richard Barnette4c81b972018-07-18 12:35:16 -0700240def get_update_failure_reason(exception):
241 """Convert an exception into a failure reason for metrics.
242
243 The passed in `exception` should be one raised by failure of
244 `ChromiumOSUpdater.run_update`. The returned string will describe
245 the failure. If the input exception value is not a truish value
246 the return value will be `None`.
247
248 The number of possible return strings is restricted to a limited
249 enumeration of values so that the string may be safely used in
250 Monarch metrics without worrying about cardinality of the range of
251 string values.
252
253 @param exception Exception to be converted to a failure reason.
254
255 @return A string suitable for use in Monarch metrics, or `None`.
256 """
257 if exception:
258 if isinstance(exception, _AttributedUpdateError):
259 return exception.failure_summary
260 else:
261 return 'Unknown Error: %s' % type(exception).__name__
262 return None
263
264
Prashanth B32baa9b2014-03-13 13:23:01 -0700265def _get_devserver_build_from_update_url(update_url):
266 """Get the devserver and build from the update url.
267
268 @param update_url: The url for update.
269 Eg: http://devserver:port/update/build.
270
271 @return: A tuple of (devserver url, build) or None if the update_url
272 doesn't match the expected pattern.
273
274 @raises ValueError: If the update_url doesn't match the expected pattern.
275 @raises ValueError: If no global_config was found, or it doesn't contain an
276 image_url_pattern.
277 """
278 pattern = global_config.global_config.get_config_value(
279 'CROS', 'image_url_pattern', type=str, default='')
280 if not pattern:
281 raise ValueError('Cannot parse update_url, the global config needs '
282 'an image_url_pattern.')
283 re_pattern = pattern.replace('%s', '(\S+)')
284 parts = re.search(re_pattern, update_url)
285 if not parts or len(parts.groups()) < 2:
286 raise ValueError('%s is not an update url' % update_url)
287 return parts.groups()
288
289
Richard Barnette3e8b2282018-05-15 20:42:20 +0000290def _list_image_dir_contents(update_url):
Prashanth B32baa9b2014-03-13 13:23:01 -0700291 """Lists the contents of the devserver for a given build/update_url.
292
293 @param update_url: An update url. Eg: http://devserver:port/update/build.
294 """
295 if not update_url:
296 logging.warning('Need update_url to list contents of the devserver.')
297 return
298 error_msg = 'Cannot check contents of devserver, update url %s' % update_url
299 try:
300 devserver_url, build = _get_devserver_build_from_update_url(update_url)
301 except ValueError as e:
302 logging.warning('%s: %s', error_msg, e)
303 return
304 devserver = dev_server.ImageServer(devserver_url)
305 try:
306 devserver.list_image_dir(build)
307 # The devserver will retry on URLError to avoid flaky connections, but will
308 # eventually raise the URLError if it persists. All HTTPErrors get
309 # converted to DevServerExceptions.
310 except (dev_server.DevServerException, urllib2.URLError) as e:
311 logging.warning('%s: %s', error_msg, e)
312
313
Richard Barnette621a8e42018-06-25 17:34:11 -0700314def _get_metric_fields(update_url):
315 """Return a dict of metric fields.
316
317 This is used for sending autoupdate metrics for the given update URL.
318
319 @param update_url Metrics fields will be calculated from this URL.
320 """
321 build_name = url_to_image_name(update_url)
322 try:
323 board, build_type, milestone, _ = server_utils.ParseBuildName(
324 build_name)
325 except server_utils.ParseBuildNameException:
326 logging.warning('Unable to parse build name %s for metrics. '
327 'Continuing anyway.', build_name)
328 board, build_type, milestone = ('', '', '')
329 return {
330 'dev_server': dev_server.get_resolved_hostname(update_url),
331 'board': board,
332 'build_type': build_type,
333 'milestone': milestone,
334 }
335
336
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700337# TODO(garnold) This implements shared updater functionality needed for
338# supporting the autoupdate_EndToEnd server-side test. We should probably
339# migrate more of the existing ChromiumOSUpdater functionality to it as we
340# expand non-CrOS support in other tests.
Richard Barnette3e8b2282018-05-15 20:42:20 +0000341class ChromiumOSUpdater(object):
342 """Chromium OS specific DUT update functionality."""
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700343
Richard Barnette60e759e2018-07-21 20:56:59 -0700344 def __init__(self, update_url, host=None, interactive=True,
345 use_quick_provision=False):
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700346 """Initializes the object.
347
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700348 @param update_url: The URL we want the update to use.
349 @param host: A client.common_lib.hosts.Host implementation.
David Haddock76a4c882017-12-13 18:50:09 -0800350 @param interactive: Bool whether we are doing an interactive update.
Richard Barnette60e759e2018-07-21 20:56:59 -0700351 @param use_quick_provision: Whether we should attempt to perform
352 the update using the quick-provision script.
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700353 """
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700354 self.update_url = update_url
355 self.host = host
David Haddock76a4c882017-12-13 18:50:09 -0800356 self.interactive = interactive
Richard Barnette3e8b2282018-05-15 20:42:20 +0000357 self.update_version = _url_to_version(update_url)
Richard Barnette60e759e2018-07-21 20:56:59 -0700358 self._use_quick_provision = use_quick_provision
Richard Barnette3e8b2282018-05-15 20:42:20 +0000359
360
361 def _run(self, cmd, *args, **kwargs):
362 """Abbreviated form of self.host.run(...)"""
363 return self.host.run(cmd, *args, **kwargs)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700364
365
366 def check_update_status(self):
367 """Returns the current update engine state.
368
369 We use the `update_engine_client -status' command and parse the line
370 indicating the update state, e.g. "CURRENT_OP=UPDATE_STATUS_IDLE".
371 """
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800372 update_status = self.host.run(command='%s -status | grep CURRENT_OP' %
Richard Barnette3e8b2282018-05-15 20:42:20 +0000373 _UPDATER_BIN)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700374 return update_status.stdout.strip().split('=')[-1]
375
376
Richard Barnette55d1af82018-05-22 23:40:14 +0000377 def _rootdev(self, options=''):
378 """Returns the stripped output of rootdev <options>.
379
380 @param options: options to run rootdev.
381
382 """
383 return self._run('rootdev %s' % options).stdout.strip()
384
385
386 def get_kernel_state(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000387 """Returns the (<active>, <inactive>) kernel state as a pair.
388
389 @raise RootFSUpdateError if the DUT reports a root partition
390 number that isn't one of the known valid values.
391 """
Richard Barnette55d1af82018-05-22 23:40:14 +0000392 active_root = int(re.findall('\d+\Z', self._rootdev('-s'))[0])
393 if active_root == _KERNEL_A['root']:
394 return _KERNEL_A, _KERNEL_B
395 elif active_root == _KERNEL_B['root']:
396 return _KERNEL_B, _KERNEL_A
397 else:
Richard Barnette9d43e562018-06-05 17:20:10 +0000398 raise RootFSUpdateError(
399 'Encountered unknown root partition: %s' % active_root)
Richard Barnette55d1af82018-05-22 23:40:14 +0000400
401
Richard Barnette18fd5842018-05-25 18:21:14 +0000402 def _cgpt(self, flag, kernel):
403 """Return numeric cgpt value for the specified flag, kernel, device."""
404 return int(self._run('cgpt show -n -i %d %s $(rootdev -s -d)' % (
405 kernel['kernel'], flag)).stdout.strip())
Richard Barnette55d1af82018-05-22 23:40:14 +0000406
407
408 def _get_next_kernel(self):
409 """Return the kernel that has priority for the next boot."""
410 priority_a = self._cgpt('-P', _KERNEL_A)
411 priority_b = self._cgpt('-P', _KERNEL_B)
412 if priority_a > priority_b:
413 return _KERNEL_A
414 else:
415 return _KERNEL_B
416
417
418 def _get_kernel_success(self, kernel):
419 """Return boolean success flag for the specified kernel.
420
421 @param kernel: information of the given kernel, either _KERNEL_A
422 or _KERNEL_B.
423 """
424 return self._cgpt('-S', kernel) != 0
425
426
427 def _get_kernel_tries(self, kernel):
428 """Return tries count for the specified kernel.
429
430 @param kernel: information of the given kernel, either _KERNEL_A
431 or _KERNEL_B.
432 """
433 return self._cgpt('-T', kernel)
434
435
Richard Barnette3e8b2282018-05-15 20:42:20 +0000436 def _get_last_update_error(self):
Shuqian Zhaod9992722016-02-29 12:26:38 -0800437 """Get the last autoupdate error code."""
Richard Barnette3e8b2282018-05-15 20:42:20 +0000438 command_result = self._run(
439 '%s --last_attempt_error' % _UPDATER_BIN)
440 return command_result.stdout.strip().replace('\n', ', ')
Shuqian Zhaod9992722016-02-29 12:26:38 -0800441
442
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800443 def _base_update_handler_no_retry(self, run_args):
Shuqian Zhaod9992722016-02-29 12:26:38 -0800444 """Base function to handle a remote update ssh call.
445
446 @param run_args: Dictionary of args passed to ssh_host.run function.
Shuqian Zhaod9992722016-02-29 12:26:38 -0800447
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800448 @throws: intercepts and re-throws all exceptions
Shuqian Zhaod9992722016-02-29 12:26:38 -0800449 """
Shuqian Zhaod9992722016-02-29 12:26:38 -0800450 try:
451 self.host.run(**run_args)
Shuqian Zhaod9992722016-02-29 12:26:38 -0800452 except Exception as e:
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800453 logging.debug('exception in update handler: %s', e)
454 raise e
Shuqian Zhaod9992722016-02-29 12:26:38 -0800455
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800456
457 def _base_update_handler(self, run_args, err_msg_prefix=None):
458 """Handle a remote update ssh call, possibly with retries.
459
460 @param run_args: Dictionary of args passed to ssh_host.run function.
461 @param err_msg_prefix: Prefix of the exception error message.
462 """
463 def exception_handler(e):
464 """Examines exceptions and returns True if the update handler
465 should be retried.
466
467 @param e: the exception intercepted by the retry util.
468 """
469 return (isinstance(e, error.AutoservSSHTimeout) or
470 (isinstance(e, error.GenericHostRunError) and
471 hasattr(e, 'description') and
472 (re.search('ERROR_CODE=37', e.description) or
473 re.search('generic error .255.', e.description))))
474
475 try:
476 # Try the update twice (arg 2 is max_retry, not including the first
477 # call). Some exceptions may be caught by the retry handler.
478 retry_util.GenericRetry(exception_handler, 1,
479 self._base_update_handler_no_retry,
480 run_args)
481 except Exception as e:
482 message = err_msg_prefix + ': ' + str(e)
483 raise RootFSUpdateError(message)
Shuqian Zhaod9992722016-02-29 12:26:38 -0800484
485
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800486 def _wait_for_update_service(self):
487 """Ensure that the update engine daemon is running, possibly
488 by waiting for it a bit in case the DUT just rebooted and the
489 service hasn't started yet.
490 """
491 def handler(e):
492 """Retry exception handler.
493
494 Assumes that the error is due to the update service not having
495 started yet.
496
497 @param e: the exception intercepted by the retry util.
498 """
499 if isinstance(e, error.AutoservRunError):
500 logging.debug('update service check exception: %s\n'
501 'retrying...', e)
502 return True
503 else:
504 return False
505
506 # Retry at most three times, every 5s.
507 status = retry_util.GenericRetry(handler, 3,
508 self.check_update_status,
509 sleep=5)
510
511 # Expect the update engine to be idle.
512 if status != UPDATER_IDLE:
Richard Barnette9d43e562018-06-05 17:20:10 +0000513 raise RootFSUpdateError(
514 'Update engine status is %s (%s was expected).'
515 % (status, UPDATER_IDLE))
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800516
517
Richard Barnette55d1af82018-05-22 23:40:14 +0000518 def _reset_update_engine(self):
519 """Resets the host to prepare for a clean update regardless of state."""
520 self._run('stop ui || true')
521 self._run('stop update-engine || true')
522 self._run('start update-engine')
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800523 self._wait_for_update_service()
524
Richard Barnette55d1af82018-05-22 23:40:14 +0000525
526 def _reset_stateful_partition(self):
527 """Clear any pending stateful update request."""
Richard Barnette18fd5842018-05-25 18:21:14 +0000528 self._run('%s --stateful_change=reset 2>&1'
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700529 % self._get_stateful_update_script())
Richard Barnette3ef29a82018-06-28 13:52:54 -0700530 self._run('rm -f %s' % _TARGET_VERSION)
531
532
533 def _set_target_version(self):
534 """Set the "target version" for the update."""
535 version_number = self.update_version.split('-')[1]
536 self._run('echo %s > %s' % (version_number, _TARGET_VERSION))
Richard Barnette55d1af82018-05-22 23:40:14 +0000537
538
539 def _revert_boot_partition(self):
540 """Revert the boot partition."""
541 part = self._rootdev('-s')
542 logging.warning('Reverting update; Boot partition will be %s', part)
543 return self._run('/postinst %s 2>&1' % part)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700544
545
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700546 def _verify_kernel_state(self):
547 """Verify that the next kernel to boot is correct for update.
548
549 This tests that the kernel state is correct for a successfully
550 downloaded and installed update. That is, the next kernel to
551 boot must be the currently inactive kernel.
552
553 @raise RootFSUpdateError if the DUT next kernel isn't the
554 expected next kernel.
555 """
556 inactive_kernel = self.get_kernel_state()[1]
557 next_kernel = self._get_next_kernel()
558 if next_kernel != inactive_kernel:
559 raise RootFSUpdateError(
560 'Update failed. The kernel for next boot is %s, '
561 'but %s was expected.'
562 % (next_kernel['name'], inactive_kernel['name']))
563 return inactive_kernel
564
565
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700566 def _verify_update_completed(self):
567 """Verifies that an update has completed.
568
Richard Barnette9d43e562018-06-05 17:20:10 +0000569 @raise RootFSUpdateError if the DUT doesn't indicate that
570 download is complete and the DUT is ready for reboot.
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700571 """
572 status = self.check_update_status()
573 if status != UPDATER_NEED_REBOOT:
Shuqian Zhaod9992722016-02-29 12:26:38 -0800574 error_msg = ''
575 if status == UPDATER_IDLE:
Richard Barnette3e8b2282018-05-15 20:42:20 +0000576 error_msg = 'Update error: %s' % self._get_last_update_error()
Richard Barnette9d43e562018-06-05 17:20:10 +0000577 raise RootFSUpdateError(
578 'Update engine status is %s (%s was expected). %s'
579 % (status, UPDATER_NEED_REBOOT, error_msg))
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700580 return self._verify_kernel_state()
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700581
582
Richard Barnette55d1af82018-05-22 23:40:14 +0000583 def trigger_update(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000584 """Triggers a background update."""
585 # If this function is called immediately after reboot (which it
586 # can be), there is no guarantee that the update engine is up
587 # and running yet, so wait for it.
Richard Barnette55d1af82018-05-22 23:40:14 +0000588 self._wait_for_update_service()
589
590 autoupdate_cmd = ('%s --check_for_update --omaha_url=%s' %
591 (_UPDATER_BIN, self.update_url))
592 run_args = {'command': autoupdate_cmd}
593 err_prefix = 'Failed to trigger an update on %s. ' % self.host.hostname
594 logging.info('Triggering update via: %s', autoupdate_cmd)
595 metric_fields = {'success': False}
596 try:
597 self._base_update_handler(run_args, err_prefix)
598 metric_fields['success'] = True
599 finally:
600 c = metrics.Counter('chromeos/autotest/autoupdater/trigger')
Richard Barnette621a8e42018-06-25 17:34:11 -0700601 metric_fields.update(_get_metric_fields(self.update_url))
Richard Barnette55d1af82018-05-22 23:40:14 +0000602 c.increment(fields=metric_fields)
603
604
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700605 def update_image(self):
Richard Barnette18fd5842018-05-25 18:21:14 +0000606 """Updates the device root FS and kernel and verifies success."""
Shuqian Zhaofe4d62e2016-06-23 14:46:45 -0700607 autoupdate_cmd = ('%s --update --omaha_url=%s' %
Richard Barnette3e8b2282018-05-15 20:42:20 +0000608 (_UPDATER_BIN, self.update_url))
David Haddock76a4c882017-12-13 18:50:09 -0800609 if not self.interactive:
610 autoupdate_cmd = '%s --interactive=false' % autoupdate_cmd
Shuqian Zhaod9992722016-02-29 12:26:38 -0800611 run_args = {'command': autoupdate_cmd, 'timeout': 3600}
612 err_prefix = ('Failed to install device image using payload at %s '
613 'on %s. ' % (self.update_url, self.host.hostname))
614 logging.info('Updating image via: %s', autoupdate_cmd)
Allen Li1a5cc0a2017-06-20 14:08:59 -0700615 metric_fields = {'success': False}
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800616 try:
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800617 self._base_update_handler(run_args, err_prefix)
Allen Li1a5cc0a2017-06-20 14:08:59 -0700618 metric_fields['success'] = True
619 finally:
Allen Li1a5cc0a2017-06-20 14:08:59 -0700620 c = metrics.Counter('chromeos/autotest/autoupdater/update')
Richard Barnette621a8e42018-06-25 17:34:11 -0700621 metric_fields.update(_get_metric_fields(self.update_url))
Allen Li1a5cc0a2017-06-20 14:08:59 -0700622 c.increment(fields=metric_fields)
Richard Barnette4d211c92018-05-24 18:56:08 +0000623 return self._verify_update_completed()
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700624
625
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700626 def _get_remote_script(self, script_name):
627 """Ensure that `script_name` is present on the DUT.
Chris Sosa5e4246b2012-05-22 18:05:22 -0700628
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700629 The given script (e.g. `stateful_update`) may be present in the
630 stateful partition under /usr/local/bin, or we may have to
631 download it from the devserver.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700632
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700633 Determine whether the script is present or must be downloaded
634 and download if necessary. Then, return a command fragment
635 sufficient to run the script from whereever it now lives on the
636 DUT.
Richard Barnette9d43e562018-06-05 17:20:10 +0000637
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700638 @param script_name The name of the script as expected in
639 /usr/local/bin and on the devserver.
640 @return A string with the command (minus arguments) that will
641 run the target script.
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700642 """
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700643 remote_script = '/usr/local/bin/%s' % script_name
644 if self.host.path_exists(remote_script):
645 return remote_script
646 remote_tmp_script = '/tmp/%s' % script_name
647 server_name = urlparse.urlparse(self.update_url)[1]
648 script_url = 'http://%s/static/%s' % (server_name, script_name)
Dana Goyette353d1d92019-06-27 10:43:59 -0700649 fetch_script = 'curl -Ss -o %s %s && head -1 %s' % (
650 remote_tmp_script, script_url, remote_tmp_script)
Chris Sosa5e4246b2012-05-22 18:05:22 -0700651
Dana Goyette353d1d92019-06-27 10:43:59 -0700652 first_line = self._run(fetch_script).stdout.strip()
653
654 if first_line and first_line.startswith('#!'):
655 script_interpreter = first_line.lstrip('#!')
656 if script_interpreter:
657 return '%s %s' % (script_interpreter, remote_tmp_script)
658 return None
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700659
660 def _get_stateful_update_script(self):
661 """Returns a command to run the stateful update script.
662
663 Find `stateful_update` on the target or install it, as
664 necessary. If installation fails, raise an exception.
665
666 @raise StatefulUpdateError if the script can't be found or
667 installed.
668 @return A string that can be joined with arguments to run the
669 `stateful_update` command on the DUT.
670 """
671 script_command = self._get_remote_script(_STATEFUL_UPDATE_SCRIPT)
672 if not script_command:
673 raise StatefulUpdateError('Could not install %s on DUT'
Richard Barnette9d43e562018-06-05 17:20:10 +0000674 % _STATEFUL_UPDATE_SCRIPT)
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700675 return script_command
Chris Sosa5e4246b2012-05-22 18:05:22 -0700676
677
Chris Sosac1932172013-10-16 13:28:53 -0700678 def rollback_rootfs(self, powerwash):
679 """Triggers rollback and waits for it to complete.
680
681 @param powerwash: If true, powerwash as part of rollback.
682
683 @raise RootFSUpdateError if anything went wrong.
Chris Sosac1932172013-10-16 13:28:53 -0700684 """
Dan Shi549fb822015-03-24 18:01:11 -0700685 version = self.host.get_release_version()
Chris Sosac8617522014-06-09 23:22:26 +0000686 # Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches
687 # X.Y.Z. This version split just pulls the first part out.
688 try:
689 build_number = int(version.split('.')[0])
690 except ValueError:
691 logging.error('Could not parse build number.')
692 build_number = 0
693
694 if build_number >= 5772:
Richard Barnette3e8b2282018-05-15 20:42:20 +0000695 can_rollback_cmd = '%s --can_rollback' % _UPDATER_BIN
Chris Sosac8617522014-06-09 23:22:26 +0000696 logging.info('Checking for rollback.')
697 try:
698 self._run(can_rollback_cmd)
699 except error.AutoservRunError as e:
700 raise RootFSUpdateError("Rollback isn't possible on %s: %s" %
701 (self.host.hostname, str(e)))
702
Richard Barnette3e8b2282018-05-15 20:42:20 +0000703 rollback_cmd = '%s --rollback --follow' % _UPDATER_BIN
Chris Sosac1932172013-10-16 13:28:53 -0700704 if not powerwash:
Dan Shif3a35f72016-01-25 11:18:14 -0800705 rollback_cmd += ' --nopowerwash'
Chris Sosac1932172013-10-16 13:28:53 -0700706
Chris Sosac8617522014-06-09 23:22:26 +0000707 logging.info('Performing rollback.')
Chris Sosac1932172013-10-16 13:28:53 -0700708 try:
709 self._run(rollback_cmd)
Chris Sosac1932172013-10-16 13:28:53 -0700710 except error.AutoservRunError as e:
711 raise RootFSUpdateError('Rollback failed on %s: %s' %
712 (self.host.hostname, str(e)))
713
714 self._verify_update_completed()
715
Gilad Arnold0ed760c2012-11-05 23:42:53 -0800716
Chris Sosa72312602013-04-16 15:01:56 -0700717 def update_stateful(self, clobber=True):
718 """Updates the stateful partition.
719
720 @param clobber: If True, a clean stateful installation.
Richard Barnette9d43e562018-06-05 17:20:10 +0000721
722 @raise StatefulUpdateError if the update script fails to
723 complete successfully.
Chris Sosa72312602013-04-16 15:01:56 -0700724 """
Chris Sosa77556d82012-04-05 15:23:14 -0700725 logging.info('Updating stateful partition...')
Richard Barnette18fd5842018-05-25 18:21:14 +0000726 statefuldev_url = self.update_url.replace('update', 'static')
Chris Sosaa3ac2152012-05-23 22:23:13 -0700727
Dale Curtis5c32c722011-05-04 19:24:23 -0700728 # Attempt stateful partition update; this must succeed so that the newly
729 # installed host is testable after update.
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700730 statefuldev_cmd = [self._get_stateful_update_script(), statefuldev_url]
Chris Sosa72312602013-04-16 15:01:56 -0700731 if clobber:
732 statefuldev_cmd.append('--stateful_change=clean')
733
734 statefuldev_cmd.append('2>&1')
Dale Curtis5c32c722011-05-04 19:24:23 -0700735 try:
Dan Shi205b8732016-01-25 10:56:22 -0800736 self._run(' '.join(statefuldev_cmd), timeout=1200)
Dale Curtis5c32c722011-05-04 19:24:23 -0700737 except error.AutoservRunError:
Richard Barnette18fd5842018-05-25 18:21:14 +0000738 raise StatefulUpdateError(
Gilad Arnold62cf3a42015-10-01 09:15:25 -0700739 'Failed to perform stateful update on %s' %
740 self.host.hostname)
Dale Curtis5c32c722011-05-04 19:24:23 -0700741
Chris Sosaa3ac2152012-05-23 22:23:13 -0700742
Richard Barnette54d14f52018-05-18 16:39:49 +0000743 def verify_boot_expectations(self, expected_kernel, rollback_message):
Richard Barnette55d1af82018-05-22 23:40:14 +0000744 """Verifies that we fully booted given expected kernel state.
745
746 This method both verifies that we booted using the correct kernel
747 state and that the OS has marked the kernel as good.
748
Richard Barnette54d14f52018-05-18 16:39:49 +0000749 @param expected_kernel: kernel that we are verifying with,
Richard Barnette55d1af82018-05-22 23:40:14 +0000750 i.e. I expect to be booted onto partition 4 etc. See output of
751 get_kernel_state.
Richard Barnette9d43e562018-06-05 17:20:10 +0000752 @param rollback_message: string include in except message text
Richard Barnette55d1af82018-05-22 23:40:14 +0000753 if we booted with the wrong partition.
754
Richard Barnette9d43e562018-06-05 17:20:10 +0000755 @raise NewBuildUpdateError if any of the various checks fail.
Richard Barnette55d1af82018-05-22 23:40:14 +0000756 """
757 # Figure out the newly active kernel.
Richard Barnette54d14f52018-05-18 16:39:49 +0000758 active_kernel = self.get_kernel_state()[0]
Richard Barnette55d1af82018-05-22 23:40:14 +0000759
760 # Check for rollback due to a bad build.
Richard Barnette54d14f52018-05-18 16:39:49 +0000761 if active_kernel != expected_kernel:
Richard Barnette55d1af82018-05-22 23:40:14 +0000762
763 # Kernel crash reports should be wiped between test runs, but
764 # may persist from earlier parts of the test, or from problems
765 # with provisioning.
766 #
767 # Kernel crash reports will NOT be present if the crash happened
768 # before encrypted stateful is mounted.
769 #
770 # TODO(dgarrett): Integrate with server/crashcollect.py at some
771 # point.
772 kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash')
773 if kernel_crashes:
774 rollback_message += ': kernel_crash'
775 logging.debug('Found %d kernel crash reports:',
776 len(kernel_crashes))
777 # The crash names contain timestamps that may be useful:
778 # kernel.20131207.005945.0.kcrash
779 for crash in kernel_crashes:
780 logging.debug(' %s', os.path.basename(crash))
781
782 # Print out some information to make it easier to debug
783 # the rollback.
784 logging.debug('Dumping partition table.')
785 self._run('cgpt show $(rootdev -s -d)')
786 logging.debug('Dumping crossystem for firmware debugging.')
787 self._run('crossystem --all')
Richard Barnette9d43e562018-06-05 17:20:10 +0000788 raise NewBuildUpdateError(self.update_version, rollback_message)
Richard Barnette55d1af82018-05-22 23:40:14 +0000789
790 # Make sure chromeos-setgoodkernel runs.
791 try:
792 utils.poll_for_condition(
Richard Barnette54d14f52018-05-18 16:39:49 +0000793 lambda: (self._get_kernel_tries(active_kernel) == 0
794 and self._get_kernel_success(active_kernel)),
Richard Barnette9d43e562018-06-05 17:20:10 +0000795 exception=RootFSUpdateError(),
Richard Barnette55d1af82018-05-22 23:40:14 +0000796 timeout=_KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
Richard Barnette9d43e562018-06-05 17:20:10 +0000797 except RootFSUpdateError:
Richard Barnette55d1af82018-05-22 23:40:14 +0000798 services_status = self._run('status system-services').stdout
799 if services_status != 'system-services start/running\n':
Richard Barnette5adb6d42018-06-28 15:52:32 -0700800 event = NewBuildUpdateError.CHROME_FAILURE
Richard Barnette55d1af82018-05-22 23:40:14 +0000801 else:
Richard Barnette5adb6d42018-06-28 15:52:32 -0700802 event = NewBuildUpdateError.UPDATE_ENGINE_FAILURE
Richard Barnette9d43e562018-06-05 17:20:10 +0000803 raise NewBuildUpdateError(self.update_version, event)
Richard Barnette55d1af82018-05-22 23:40:14 +0000804
805
Richard Barnette14ee84c2018-05-18 20:23:42 +0000806 def _prepare_host(self):
807 """Make sure the target DUT is working and ready for update.
808
809 Initially, the target DUT's state is unknown. The DUT is
810 expected to be online, but we strive to be forgiving if Chrome
811 and/or the update engine aren't fully functional.
812 """
813 # Summary of work, and the rationale:
814 # 1. Reboot, because it's a good way to clear out problems.
815 # 2. Touch the PROVISION_FAILED file, to allow repair to detect
816 # failure later.
817 # 3. Run the hook for host class specific preparation.
818 # 4. Stop Chrome, because the system is designed to eventually
819 # reboot if Chrome is stuck in a crash loop.
820 # 5. Force `update-engine` to start, because if Chrome failed
821 # to start properly, the status of the `update-engine` job
822 # will be uncertain.
Richard Barnette5adb6d42018-06-28 15:52:32 -0700823 if not self.host.is_up():
824 raise HostUpdateError(self.host.hostname,
825 HostUpdateError.DUT_DOWN)
Richard Barnette14ee84c2018-05-18 20:23:42 +0000826 self._reset_stateful_partition()
827 self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
828 self._run('touch %s' % PROVISION_FAILED)
829 self.host.prepare_for_update()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700830 self._reset_update_engine()
Richard Barnette14ee84c2018-05-18 20:23:42 +0000831 logging.info('Updating from version %s to %s.',
832 self.host.get_release_version(),
833 self.update_version)
834
835
836 def _verify_devserver(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000837 """Check that our chosen devserver is still working.
838
839 @raise DevServerError if the devserver fails any sanity check.
840 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000841 server = 'http://%s' % urlparse.urlparse(self.update_url)[1]
842 try:
843 if not dev_server.ImageServer.devserver_healthy(server):
Richard Barnette9d43e562018-06-05 17:20:10 +0000844 raise DevServerError(
845 server, 'Devserver is not healthy')
Richard Barnette14ee84c2018-05-18 20:23:42 +0000846 except Exception as e:
Richard Barnette9d43e562018-06-05 17:20:10 +0000847 raise DevServerError(
848 server, 'Devserver is not up and available')
Richard Barnette14ee84c2018-05-18 20:23:42 +0000849
850
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700851 def _install_via_update_engine(self):
852 """Install an updating using the production AU flow.
853
854 This uses the standard AU flow and the `stateful_update` script
855 to download and install a root FS, kernel and stateful
856 filesystem content.
857
858 @return The kernel expected to be booted next.
859 """
860 logging.info('Installing image using update_engine.')
861 expected_kernel = self.update_image()
862 self.update_stateful()
Richard Barnette3ef29a82018-06-28 13:52:54 -0700863 self._set_target_version()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700864 return expected_kernel
865
866
Congbin Guoeb7aa2d2019-07-15 16:10:44 -0700867 def _quick_provision_with_gs_cache(self, provision_command, devserver_name,
868 image_name):
869 """Run quick_provision using GsCache server.
870
871 @param provision_command: The path of quick_provision command.
872 @param devserver_name: The devserver name and port (optional).
873 @param image_name: The image to be installed.
874 """
875 logging.info('Try quick provision with gs_cache.')
876 # If enabled, GsCache server listion on different port on the
877 # devserver.
878 gs_cache_server = devserver_name.replace(DEVSERVER_PORT, GS_CACHE_PORT)
879 gs_cache_url = ('http://%s/download/chromeos-image-archive'
880 % gs_cache_server)
881
882 # Check if GS_Cache server is enabled on the server.
Congbin Guo4a2a6642019-08-12 15:03:01 -0700883 self._run('curl -s -o /dev/null %s' % gs_cache_url)
Congbin Guoeb7aa2d2019-07-15 16:10:44 -0700884
885 command = '%s --noreboot %s %s' % (provision_command, image_name,
886 gs_cache_url)
887 self._run(command)
888 metrics.Counter(_metric_name('quick_provision')).increment(
889 fields={'devserver': devserver_name, 'gs_cache': True})
890
891
892 def _quick_provision_with_devserver(self, provision_command,
893 devserver_name, image_name):
894 """Run quick_provision using legacy devserver.
895
896 @param provision_command: The path of quick_provision command.
897 @param devserver_name: The devserver name and port (optional).
898 @param image_name: The image to be installed.
899 """
900 static_url = 'http://%s/static' % devserver_name
901 command = '%s --noreboot %s %s' % (provision_command, image_name,
902 static_url)
903 logging.info('Try quick provision with devserver.')
904 self._run(command)
905 metrics.Counter(_metric_name('quick_provision')).increment(
906 fields={'devserver': devserver_name, 'gs_cache': False})
907
908
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700909 def _install_via_quick_provision(self):
910 """Install an updating using the `quick-provision` script.
911
912 This uses the `quick-provision` script to download and install
913 a root FS, kernel and stateful filesystem content.
914
915 @return The kernel expected to be booted next.
916 """
Richard Barnette60e759e2018-07-21 20:56:59 -0700917 if not self._use_quick_provision:
918 return None
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700919 image_name = url_to_image_name(self.update_url)
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700920 logging.info('Installing image using quick-provision.')
921 provision_command = self._get_remote_script(_QUICK_PROVISION_SCRIPT)
922 server_name = urlparse.urlparse(self.update_url)[1]
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700923 try:
Congbin Guoeb7aa2d2019-07-15 16:10:44 -0700924 try:
925 self._quick_provision_with_gs_cache(provision_command,
926 server_name, image_name)
927 except Exception:
928 self._quick_provision_with_devserver(provision_command,
929 server_name, image_name)
930
Richard Barnette3ef29a82018-06-28 13:52:54 -0700931 self._set_target_version()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700932 return self._verify_kernel_state()
933 except Exception:
934 # N.B. We handle only `Exception` here. Non-Exception
935 # classes (such as KeyboardInterrupt) are handled by our
936 # caller.
937 logging.exception('quick-provision script failed; '
938 'will fall back to update_engine.')
939 self._revert_boot_partition()
940 self._reset_stateful_partition()
941 self._reset_update_engine()
942 return None
943
944
Richard Barnette54d14f52018-05-18 16:39:49 +0000945 def _install_update(self):
Richard Barnette0beb14b2018-05-15 18:07:52 +0000946 """Install the requested image on the DUT, but don't start it.
947
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700948 This downloads and installs a root FS, kernel and stateful
949 filesystem content. This does not reboot the DUT, so the update
950 is merely pending when the method returns.
951
952 @return The kernel expected to be booted next.
Dan Shi0f466e82013-02-22 15:44:58 -0800953 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000954 logging.info('Installing image at %s onto %s',
955 self.update_url, self.host.hostname)
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200956 try:
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700957 return (self._install_via_quick_provision()
958 or self._install_via_update_engine())
Dale Curtis1e973182011-07-12 18:21:36 -0700959 except:
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700960 # N.B. This handling code includes non-Exception classes such
961 # as KeyboardInterrupt. We need to clean up, but we also must
962 # re-raise.
Richard Barnette14ee84c2018-05-18 20:23:42 +0000963 self._revert_boot_partition()
964 self._reset_stateful_partition()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700965 self._reset_update_engine()
Dale Curtis1e973182011-07-12 18:21:36 -0700966 # Collect update engine logs in the event of failure.
967 if self.host.job:
Aviv Keshet2610d3e2016-06-01 16:37:01 -0700968 logging.info('Collecting update engine logs due to failure...')
Dale Curtis1e973182011-07-12 18:21:36 -0700969 self.host.get_file(
Richard Barnette3e8b2282018-05-15 20:42:20 +0000970 _UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
Gilad Arnold0c0df732015-09-21 06:37:59 -0700971 preserve_perm=False)
Richard Barnette3e8b2282018-05-15 20:42:20 +0000972 _list_image_dir_contents(self.update_url)
Dale Curtis1e973182011-07-12 18:21:36 -0700973 raise
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200974
975
Richard Barnette14ee84c2018-05-18 20:23:42 +0000976 def _complete_update(self, expected_kernel):
977 """Finish the update, and confirm that it succeeded.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000978
Richard Barnette14ee84c2018-05-18 20:23:42 +0000979 Initial condition is that the target build has been downloaded
980 and installed on the DUT, but has not yet been booted. This
981 function is responsible for rebooting the DUT, and checking that
982 the new build is running successfully.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000983
Richard Barnette14ee84c2018-05-18 20:23:42 +0000984 @param expected_kernel: kernel expected to be active after reboot.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000985 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000986 # Regarding the 'crossystem' command below: In some cases,
987 # the update flow puts the TPM into a state such that it
988 # fails verification. We don't know why. However, this
989 # call papers over the problem by clearing the TPM during
990 # the reboot.
991 #
992 # We ignore failures from 'crossystem'. Although failure
993 # here is unexpected, and could signal a bug, the point of
994 # the exercise is to paper over problems; allowing this to
995 # fail would defeat the purpose.
996 self._run('crossystem clear_tpm_owner_request=1',
997 ignore_status=True)
998 self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
999
Richard Barnette0beb14b2018-05-15 18:07:52 +00001000 # Touch the lab machine file to leave a marker that
1001 # distinguishes this image from other test images.
1002 # Afterwards, we must re-run the autoreboot script because
1003 # it depends on the _LAB_MACHINE_FILE.
1004 autoreboot_cmd = ('FILE="%s" ; [ -f "$FILE" ] || '
1005 '( touch "$FILE" ; start autoreboot )')
Richard Barnette3e8b2282018-05-15 20:42:20 +00001006 self._run(autoreboot_cmd % _LAB_MACHINE_FILE)
Richard Barnette0beb14b2018-05-15 18:07:52 +00001007 self.verify_boot_expectations(
Richard Barnette5adb6d42018-06-28 15:52:32 -07001008 expected_kernel, NewBuildUpdateError.ROLLBACK_FAILURE)
Richard Barnette0beb14b2018-05-15 18:07:52 +00001009
1010 logging.debug('Cleaning up old autotest directories.')
1011 try:
1012 installed_autodir = autotest.Autotest.get_installed_autodir(
1013 self.host)
1014 self._run('rm -rf ' + installed_autodir)
1015 except autotest.AutodirNotFoundError:
1016 logging.debug('No autotest installed directory found.')
1017
1018
Richard Barnette4c81b972018-07-18 12:35:16 -07001019 def run_update(self):
1020 """Perform a full update of a DUT in the test lab.
Richard Barnette0beb14b2018-05-15 18:07:52 +00001021
Richard Barnette4c81b972018-07-18 12:35:16 -07001022 This downloads and installs the root FS and stateful partition
1023 content needed for the update specified in `self.host` and
1024 `self.update_url`. The update is performed according to the
1025 requirements for provisioning a DUT for testing the requested
1026 build.
Richard Barnette0beb14b2018-05-15 18:07:52 +00001027
Richard Barnette4c81b972018-07-18 12:35:16 -07001028 At the end of the procedure, metrics are reported describing the
1029 outcome of the operation.
1030
1031 @returns A tuple of the form `(image_name, attributes)`, where
1032 `image_name` is the name of the image installed, and
1033 `attributes` is new attributes to be applied to the DUT.
Richard Barnette0beb14b2018-05-15 18:07:52 +00001034 """
Richard Barnette4c81b972018-07-18 12:35:16 -07001035 server_name = dev_server.get_resolved_hostname(self.update_url)
1036 metrics.Counter(_metric_name('install')).increment(
1037 fields={'devserver': server_name})
1038
Richard Barnette14ee84c2018-05-18 20:23:42 +00001039 self._verify_devserver()
Richard Barnette9d43e562018-06-05 17:20:10 +00001040
1041 try:
1042 self._prepare_host()
1043 except _AttributedUpdateError:
1044 raise
1045 except Exception as e:
1046 logging.exception('Failure preparing host prior to update.')
1047 raise HostUpdateError(self.host.hostname, str(e))
1048
1049 try:
1050 expected_kernel = self._install_update()
1051 except _AttributedUpdateError:
1052 raise
1053 except Exception as e:
1054 logging.exception('Failure during download and install.')
Richard Barnette621a8e42018-06-25 17:34:11 -07001055 server_name = dev_server.get_resolved_hostname(self.update_url)
Richard Barnette9d43e562018-06-05 17:20:10 +00001056 raise ImageInstallError(self.host.hostname, server_name, str(e))
1057
1058 try:
1059 self._complete_update(expected_kernel)
1060 except _AttributedUpdateError:
1061 raise
1062 except Exception as e:
1063 logging.exception('Failure from build after update.')
1064 raise NewBuildUpdateError(self.update_version, str(e))
Richard Barnette0beb14b2018-05-15 18:07:52 +00001065
Richard Barnette0beb14b2018-05-15 18:07:52 +00001066 image_name = url_to_image_name(self.update_url)
1067 # update_url is different from devserver url needed to stage autotest
1068 # packages, therefore, resolve a new devserver url here.
1069 devserver_url = dev_server.ImageServer.resolve(
1070 image_name, self.host.hostname).url()
1071 repo_url = tools.get_package_url(devserver_url, image_name)
1072 return image_name, {ds_constants.JOB_REPO_URL: repo_url}