blob: a343b70584fc58b9b133d398ce431fc51ae53711 [file] [log] [blame]
Chris Sosa5e4246b2012-05-22 18:05:22 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Sean O'Connor5346e4e2010-08-12 18:49:24 +02002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Don Garrett56b1cc82013-12-06 17:49:20 -08005import glob
Sean O'Connor5346e4e2010-08-12 18:49:24 +02006import logging
Dale Curtis5c32c722011-05-04 19:24:23 -07007import os
Sean O'Connor5346e4e2010-08-12 18:49:24 +02008import re
Richard Barnette621a8e42018-06-25 17:34:11 -07009import time
Prashanth B32baa9b2014-03-13 13:23:01 -070010import urllib2
Richard Barnette0beb14b2018-05-15 18:07:52 +000011import urlparse
Sean O'Connor5346e4e2010-08-12 18:49:24 +020012
Chris Sosa65425082013-10-16 13:26:22 -070013from autotest_lib.client.bin import utils
Dale Curtis5c32c722011-05-04 19:24:23 -070014from autotest_lib.client.common_lib import error, global_config
Prashanth B32baa9b2014-03-13 13:23:01 -070015from autotest_lib.client.common_lib.cros import dev_server
Richard Barnette0beb14b2018-05-15 18:07:52 +000016from autotest_lib.server import autotest
Shelley Chen61d28982016-10-28 09:40:20 -070017from autotest_lib.server import utils as server_utils
Richard Barnette0beb14b2018-05-15 18:07:52 +000018from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
19from autotest_lib.server.cros.dynamic_suite import tools
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -080020from chromite.lib import retry_util
Dan Shif3a35f72016-01-25 11:18:14 -080021
Shelley Chen16b8df32016-10-27 16:24:21 -070022try:
23 from chromite.lib import metrics
Dan Shi5e2efb72017-02-07 11:40:23 -080024except ImportError:
25 metrics = utils.metrics_mock
Sean O'Connor5346e4e2010-08-12 18:49:24 +020026
Gwendal Grignou3e96cc22017-06-07 16:22:51 -070027
Richard Barnette621a8e42018-06-25 17:34:11 -070028def _metric_name(base_name):
29 return 'chromeos/autotest/provision/' + base_name
30
31
Dale Curtis5c32c722011-05-04 19:24:23 -070032# Local stateful update path is relative to the CrOS source directory.
Sean O'Connor5346e4e2010-08-12 18:49:24 +020033UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
Sean Oc053dfe2010-08-23 18:22:26 +020034UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
beeps5e8c45a2013-12-17 22:05:11 -080035# A list of update engine client states that occur after an update is triggered.
36UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE',
37 'UPDATE_STATUS_UPDATE_AVAILABLE',
38 'UPDATE_STATUS_DOWNLOADING',
39 'UPDATE_STATUS_FINALIZING']
Sean O'Connor5346e4e2010-08-12 18:49:24 +020040
Richard Barnette0beb14b2018-05-15 18:07:52 +000041
Richard Barnette3e8b2282018-05-15 20:42:20 +000042_STATEFUL_UPDATE_SCRIPT = 'stateful_update'
Richard Barnettee86b1ce2018-06-07 10:37:23 -070043_QUICK_PROVISION_SCRIPT = 'quick-provision'
Richard Barnette3e8b2282018-05-15 20:42:20 +000044
45_UPDATER_BIN = '/usr/bin/update_engine_client'
46_UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine']
47
48_KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3}
49_KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5}
50
51# Time to wait for new kernel to be marked successful after
52# auto update.
53_KERNEL_UPDATE_TIMEOUT = 120
54
55
Richard Barnette0beb14b2018-05-15 18:07:52 +000056# PROVISION_FAILED - A flag file to indicate provision failures. The
57# file is created at the start of any AU procedure (see
Richard Barnette9d43e562018-06-05 17:20:10 +000058# `ChromiumOSUpdater._prepare_host()`). The file's location in
Richard Barnette0beb14b2018-05-15 18:07:52 +000059# stateful means that on successul update it will be removed. Thus, if
60# this file exists, it indicates that we've tried and failed in a
61# previous attempt to update.
62PROVISION_FAILED = '/var/tmp/provision_failed'
63
64
Richard Barnette3e8b2282018-05-15 20:42:20 +000065# A flag file used to enable special handling in lab DUTs. Some
66# parts of the system in Chromium OS test images will behave in ways
67# convenient to the test lab when this file is present. Generally,
68# we create this immediately after any update completes.
69_LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
70
71
Richard Barnette3ef29a82018-06-28 13:52:54 -070072# _TARGET_VERSION - A file containing the new version to which we plan
73# to update. This file is used by the CrOS shutdown code to detect and
74# handle certain version downgrade cases. Specifically: Downgrading
75# may trigger an unwanted powerwash in the target build when the
76# following conditions are met:
77# * Source build is a v4.4 kernel with R69-10756.0.0 or later.
78# * Target build predates the R69-10756.0.0 cutoff.
79# When this file is present and indicates a downgrade, the OS shutdown
80# code on the DUT knows how to prevent the powerwash.
81_TARGET_VERSION = '/run/update_target_version'
82
83
Richard Barnette5adb6d42018-06-28 15:52:32 -070084# _REBOOT_FAILURE_MESSAGE - This is the standard message text returned
85# when the Host.reboot() method fails. The source of this text comes
86# from `wait_for_restart()` in client/common_lib/hosts/base_classes.py.
87
88_REBOOT_FAILURE_MESSAGE = 'Host did not return from reboot'
89
90
Richard Barnette9d43e562018-06-05 17:20:10 +000091class RootFSUpdateError(error.TestFail):
Chris Sosa77556d82012-04-05 15:23:14 -070092 """Raised when the RootFS fails to update."""
Chris Sosa77556d82012-04-05 15:23:14 -070093
94
Richard Barnette9d43e562018-06-05 17:20:10 +000095class StatefulUpdateError(error.TestFail):
Chris Sosa77556d82012-04-05 15:23:14 -070096 """Raised when the stateful partition fails to update."""
Chris Sosa77556d82012-04-05 15:23:14 -070097
98
Richard Barnette9d43e562018-06-05 17:20:10 +000099class _AttributedUpdateError(error.TestFail):
100 """Update failure with an attributed cause."""
101
102 def __init__(self, attribution, msg):
103 super(_AttributedUpdateError, self).__init__(
104 '%s: %s' % (attribution, msg))
Richard Barnette5adb6d42018-06-28 15:52:32 -0700105 self._message = msg
106
107 def _classify(self):
108 for err_pattern, classification in self._CLASSIFIERS:
109 if re.match(err_pattern, self._message):
110 return classification
111 return None
112
113 @property
114 def failure_summary(self):
115 """Summarize this error for metrics reporting."""
116 classification = self._classify()
117 if classification:
118 return '%s: %s' % (self._SUMMARY, classification)
119 else:
120 return self._SUMMARY
Richard Barnette9d43e562018-06-05 17:20:10 +0000121
122
123class HostUpdateError(_AttributedUpdateError):
124 """Failure updating a DUT attributable to the DUT.
125
126 This class of exception should be raised when the most likely cause
127 of failure was a condition existing on the DUT prior to the update,
128 such as a hardware problem, or a bug in the software on the DUT.
129 """
130
Richard Barnette5adb6d42018-06-28 15:52:32 -0700131 DUT_DOWN = 'No answer to ssh'
132
133 _SUMMARY = 'DUT failed prior to update'
134 _CLASSIFIERS = [
135 (DUT_DOWN, DUT_DOWN),
136 (_REBOOT_FAILURE_MESSAGE, 'Reboot failed'),
137 ]
138
Richard Barnette9d43e562018-06-05 17:20:10 +0000139 def __init__(self, hostname, msg):
140 super(HostUpdateError, self).__init__(
141 'Error on %s prior to update' % hostname, msg)
142
143
144class DevServerError(_AttributedUpdateError):
145 """Failure updating a DUT attributable to the devserver.
146
147 This class of exception should be raised when the most likely cause
148 of failure was the devserver serving the target image for update.
149 """
150
Richard Barnette5adb6d42018-06-28 15:52:32 -0700151 _SUMMARY = 'Devserver failed prior to update'
152 _CLASSIFIERS = []
153
Richard Barnette9d43e562018-06-05 17:20:10 +0000154 def __init__(self, devserver, msg):
155 super(DevServerError, self).__init__(
156 'Devserver error on %s' % devserver, msg)
157
158
159class ImageInstallError(_AttributedUpdateError):
160 """Failure updating a DUT when installing from the devserver.
161
162 This class of exception should be raised when the target DUT fails
163 to download and install the target image from the devserver, and
164 either the devserver or the DUT might be at fault.
165 """
166
Richard Barnette5adb6d42018-06-28 15:52:32 -0700167 _SUMMARY = 'Image failed to download and install'
168 _CLASSIFIERS = []
169
Richard Barnette9d43e562018-06-05 17:20:10 +0000170 def __init__(self, hostname, devserver, msg):
171 super(ImageInstallError, self).__init__(
172 'Download and install failed from %s onto %s'
173 % (devserver, hostname), msg)
174
175
176class NewBuildUpdateError(_AttributedUpdateError):
177 """Failure updating a DUT attributable to the target build.
178
179 This class of exception should be raised when updating to a new
180 build fails, and the most likely cause of the failure is a bug in
181 the newly installed target build.
182 """
183
Richard Barnette5adb6d42018-06-28 15:52:32 -0700184 CHROME_FAILURE = 'Chrome failed to reach login screen'
185 UPDATE_ENGINE_FAILURE = ('update-engine failed to call '
186 'chromeos-setgoodkernel')
187 ROLLBACK_FAILURE = 'System rolled back to previous build'
188
189 _SUMMARY = 'New build failed'
190 _CLASSIFIERS = [
191 (CHROME_FAILURE, 'Chrome did not start'),
192 (UPDATE_ENGINE_FAILURE, 'update-engine did not start'),
193 (ROLLBACK_FAILURE, ROLLBACK_FAILURE),
194 ]
195
Richard Barnette9d43e562018-06-05 17:20:10 +0000196 def __init__(self, update_version, msg):
197 super(NewBuildUpdateError, self).__init__(
198 'Failure in build %s' % update_version, msg)
199
Richard Barnette621a8e42018-06-25 17:34:11 -0700200 @property
201 def failure_summary(self):
202 #pylint: disable=missing-docstring
203 return 'Build failed to work after installing'
204
Richard Barnette9d43e562018-06-05 17:20:10 +0000205
Richard Barnette3e8b2282018-05-15 20:42:20 +0000206def _url_to_version(update_url):
Dan Shi0f466e82013-02-22 15:44:58 -0800207 """Return the version based on update_url.
208
209 @param update_url: url to the image to update to.
210
211 """
Dale Curtisddfdb942011-07-14 13:59:24 -0700212 # The Chrome OS version is generally the last element in the URL. The only
213 # exception is delta update URLs, which are rooted under the version; e.g.,
214 # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
215 # strip off the au section of the path before reading the version.
Dan Shi5002cfc2013-04-29 10:45:05 -0700216 return re.sub('/au/.*', '',
217 urlparse.urlparse(update_url).path).split('/')[-1].strip()
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200218
219
Scott Zawalskieadbf702013-03-14 09:23:06 -0400220def url_to_image_name(update_url):
221 """Return the image name based on update_url.
222
223 From a URL like:
224 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
225 return lumpy-release/R27-3837.0.0
226
227 @param update_url: url to the image to update to.
228 @returns a string representing the image name in the update_url.
229
230 """
231 return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
232
233
Prashanth B32baa9b2014-03-13 13:23:01 -0700234def _get_devserver_build_from_update_url(update_url):
235 """Get the devserver and build from the update url.
236
237 @param update_url: The url for update.
238 Eg: http://devserver:port/update/build.
239
240 @return: A tuple of (devserver url, build) or None if the update_url
241 doesn't match the expected pattern.
242
243 @raises ValueError: If the update_url doesn't match the expected pattern.
244 @raises ValueError: If no global_config was found, or it doesn't contain an
245 image_url_pattern.
246 """
247 pattern = global_config.global_config.get_config_value(
248 'CROS', 'image_url_pattern', type=str, default='')
249 if not pattern:
250 raise ValueError('Cannot parse update_url, the global config needs '
251 'an image_url_pattern.')
252 re_pattern = pattern.replace('%s', '(\S+)')
253 parts = re.search(re_pattern, update_url)
254 if not parts or len(parts.groups()) < 2:
255 raise ValueError('%s is not an update url' % update_url)
256 return parts.groups()
257
258
Richard Barnette3e8b2282018-05-15 20:42:20 +0000259def _list_image_dir_contents(update_url):
Prashanth B32baa9b2014-03-13 13:23:01 -0700260 """Lists the contents of the devserver for a given build/update_url.
261
262 @param update_url: An update url. Eg: http://devserver:port/update/build.
263 """
264 if not update_url:
265 logging.warning('Need update_url to list contents of the devserver.')
266 return
267 error_msg = 'Cannot check contents of devserver, update url %s' % update_url
268 try:
269 devserver_url, build = _get_devserver_build_from_update_url(update_url)
270 except ValueError as e:
271 logging.warning('%s: %s', error_msg, e)
272 return
273 devserver = dev_server.ImageServer(devserver_url)
274 try:
275 devserver.list_image_dir(build)
276 # The devserver will retry on URLError to avoid flaky connections, but will
277 # eventually raise the URLError if it persists. All HTTPErrors get
278 # converted to DevServerExceptions.
279 except (dev_server.DevServerException, urllib2.URLError) as e:
280 logging.warning('%s: %s', error_msg, e)
281
282
Richard Barnette621a8e42018-06-25 17:34:11 -0700283def _get_metric_fields(update_url):
284 """Return a dict of metric fields.
285
286 This is used for sending autoupdate metrics for the given update URL.
287
288 @param update_url Metrics fields will be calculated from this URL.
289 """
290 build_name = url_to_image_name(update_url)
291 try:
292 board, build_type, milestone, _ = server_utils.ParseBuildName(
293 build_name)
294 except server_utils.ParseBuildNameException:
295 logging.warning('Unable to parse build name %s for metrics. '
296 'Continuing anyway.', build_name)
297 board, build_type, milestone = ('', '', '')
298 return {
299 'dev_server': dev_server.get_resolved_hostname(update_url),
300 'board': board,
301 'build_type': build_type,
302 'milestone': milestone,
303 }
304
305
Richard Barnette045eb5d2018-07-09 14:07:01 -0700306def _emit_provision_metrics(name_prefix, build_name, failure_reason,
307 duration, fields):
308 # reset_after=True is required for String gauges events to ensure that
309 # the metrics are not repeatedly emitted until the server restarts.
310 metrics.String(_metric_name(name_prefix + '_build_by_devserver_dut'),
311 reset_after=True).set(build_name, fields=fields)
312 if failure_reason:
313 metrics.String(
314 _metric_name(name_prefix + '_failure_reason_by_devserver_dut'),
315 reset_after=True).set(failure_reason, fields=fields)
316 metrics.SecondsDistribution(
317 _metric_name(name_prefix + '_duration_by_devserver_dut')).add(
318 duration, fields=fields)
319
320
321def _emit_updater_metrics(update_url, dut_host_name,
322 failure_reason, duration):
323 """Send metrics for one provision request."""
Richard Barnette621a8e42018-06-25 17:34:11 -0700324 # The following is high cardinality, but sparse.
325 # Each DUT is of a single board type, and likely build type.
Richard Barnette045eb5d2018-07-09 14:07:01 -0700326 #
327 # TODO(jrbarnette) The devserver-triggered provisioning code
328 # included retries in certain cases. For that reason, the metrics
329 # distinguished 'provision' metrics which summarized across all
330 # retries, and 'auto_update' which summarized an individual update
331 # attempt. ChromiumOSUpdater doesn't do retries, so we just report
332 # the same information twice.
Richard Barnette621a8e42018-06-25 17:34:11 -0700333 image_fields = _get_metric_fields(update_url)
334 fields = {
335 'board': image_fields['board'],
336 'build_type': image_fields['build_type'],
337 'dut_host_name': dut_host_name,
338 'dev_server': image_fields['dev_server'],
339 'success': not failure_reason,
340 }
341 build_name = url_to_image_name(update_url)
Richard Barnette045eb5d2018-07-09 14:07:01 -0700342 _emit_provision_metrics('auto_update', build_name, failure_reason,
343 duration, fields)
344 fields['attempt'] = 1
345 _emit_provision_metrics('provision', build_name, failure_reason,
346 duration, fields)
Richard Barnette621a8e42018-06-25 17:34:11 -0700347
348
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700349# TODO(garnold) This implements shared updater functionality needed for
350# supporting the autoupdate_EndToEnd server-side test. We should probably
351# migrate more of the existing ChromiumOSUpdater functionality to it as we
352# expand non-CrOS support in other tests.
Richard Barnette3e8b2282018-05-15 20:42:20 +0000353class ChromiumOSUpdater(object):
354 """Chromium OS specific DUT update functionality."""
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700355
Richard Barnette3e8b2282018-05-15 20:42:20 +0000356 def __init__(self, update_url, host=None, interactive=True):
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700357 """Initializes the object.
358
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700359 @param update_url: The URL we want the update to use.
360 @param host: A client.common_lib.hosts.Host implementation.
David Haddock76a4c882017-12-13 18:50:09 -0800361 @param interactive: Bool whether we are doing an interactive update.
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700362 """
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700363 self.update_url = update_url
364 self.host = host
David Haddock76a4c882017-12-13 18:50:09 -0800365 self.interactive = interactive
Richard Barnette3e8b2282018-05-15 20:42:20 +0000366 self.update_version = _url_to_version(update_url)
367
368
369 def _run(self, cmd, *args, **kwargs):
370 """Abbreviated form of self.host.run(...)"""
371 return self.host.run(cmd, *args, **kwargs)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700372
373
374 def check_update_status(self):
375 """Returns the current update engine state.
376
377 We use the `update_engine_client -status' command and parse the line
378 indicating the update state, e.g. "CURRENT_OP=UPDATE_STATUS_IDLE".
379 """
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800380 update_status = self.host.run(command='%s -status | grep CURRENT_OP' %
Richard Barnette3e8b2282018-05-15 20:42:20 +0000381 _UPDATER_BIN)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700382 return update_status.stdout.strip().split('=')[-1]
383
384
Richard Barnette55d1af82018-05-22 23:40:14 +0000385 def _rootdev(self, options=''):
386 """Returns the stripped output of rootdev <options>.
387
388 @param options: options to run rootdev.
389
390 """
391 return self._run('rootdev %s' % options).stdout.strip()
392
393
394 def get_kernel_state(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000395 """Returns the (<active>, <inactive>) kernel state as a pair.
396
397 @raise RootFSUpdateError if the DUT reports a root partition
398 number that isn't one of the known valid values.
399 """
Richard Barnette55d1af82018-05-22 23:40:14 +0000400 active_root = int(re.findall('\d+\Z', self._rootdev('-s'))[0])
401 if active_root == _KERNEL_A['root']:
402 return _KERNEL_A, _KERNEL_B
403 elif active_root == _KERNEL_B['root']:
404 return _KERNEL_B, _KERNEL_A
405 else:
Richard Barnette9d43e562018-06-05 17:20:10 +0000406 raise RootFSUpdateError(
407 'Encountered unknown root partition: %s' % active_root)
Richard Barnette55d1af82018-05-22 23:40:14 +0000408
409
Richard Barnette18fd5842018-05-25 18:21:14 +0000410 def _cgpt(self, flag, kernel):
411 """Return numeric cgpt value for the specified flag, kernel, device."""
412 return int(self._run('cgpt show -n -i %d %s $(rootdev -s -d)' % (
413 kernel['kernel'], flag)).stdout.strip())
Richard Barnette55d1af82018-05-22 23:40:14 +0000414
415
416 def _get_next_kernel(self):
417 """Return the kernel that has priority for the next boot."""
418 priority_a = self._cgpt('-P', _KERNEL_A)
419 priority_b = self._cgpt('-P', _KERNEL_B)
420 if priority_a > priority_b:
421 return _KERNEL_A
422 else:
423 return _KERNEL_B
424
425
426 def _get_kernel_success(self, kernel):
427 """Return boolean success flag for the specified kernel.
428
429 @param kernel: information of the given kernel, either _KERNEL_A
430 or _KERNEL_B.
431 """
432 return self._cgpt('-S', kernel) != 0
433
434
435 def _get_kernel_tries(self, kernel):
436 """Return tries count for the specified kernel.
437
438 @param kernel: information of the given kernel, either _KERNEL_A
439 or _KERNEL_B.
440 """
441 return self._cgpt('-T', kernel)
442
443
Richard Barnette3e8b2282018-05-15 20:42:20 +0000444 def _get_last_update_error(self):
Shuqian Zhaod9992722016-02-29 12:26:38 -0800445 """Get the last autoupdate error code."""
Richard Barnette3e8b2282018-05-15 20:42:20 +0000446 command_result = self._run(
447 '%s --last_attempt_error' % _UPDATER_BIN)
448 return command_result.stdout.strip().replace('\n', ', ')
Shuqian Zhaod9992722016-02-29 12:26:38 -0800449
450
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800451 def _base_update_handler_no_retry(self, run_args):
Shuqian Zhaod9992722016-02-29 12:26:38 -0800452 """Base function to handle a remote update ssh call.
453
454 @param run_args: Dictionary of args passed to ssh_host.run function.
Shuqian Zhaod9992722016-02-29 12:26:38 -0800455
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800456 @throws: intercepts and re-throws all exceptions
Shuqian Zhaod9992722016-02-29 12:26:38 -0800457 """
Shuqian Zhaod9992722016-02-29 12:26:38 -0800458 try:
459 self.host.run(**run_args)
Shuqian Zhaod9992722016-02-29 12:26:38 -0800460 except Exception as e:
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800461 logging.debug('exception in update handler: %s', e)
462 raise e
Shuqian Zhaod9992722016-02-29 12:26:38 -0800463
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800464
465 def _base_update_handler(self, run_args, err_msg_prefix=None):
466 """Handle a remote update ssh call, possibly with retries.
467
468 @param run_args: Dictionary of args passed to ssh_host.run function.
469 @param err_msg_prefix: Prefix of the exception error message.
470 """
471 def exception_handler(e):
472 """Examines exceptions and returns True if the update handler
473 should be retried.
474
475 @param e: the exception intercepted by the retry util.
476 """
477 return (isinstance(e, error.AutoservSSHTimeout) or
478 (isinstance(e, error.GenericHostRunError) and
479 hasattr(e, 'description') and
480 (re.search('ERROR_CODE=37', e.description) or
481 re.search('generic error .255.', e.description))))
482
483 try:
484 # Try the update twice (arg 2 is max_retry, not including the first
485 # call). Some exceptions may be caught by the retry handler.
486 retry_util.GenericRetry(exception_handler, 1,
487 self._base_update_handler_no_retry,
488 run_args)
489 except Exception as e:
490 message = err_msg_prefix + ': ' + str(e)
491 raise RootFSUpdateError(message)
Shuqian Zhaod9992722016-02-29 12:26:38 -0800492
493
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800494 def _wait_for_update_service(self):
495 """Ensure that the update engine daemon is running, possibly
496 by waiting for it a bit in case the DUT just rebooted and the
497 service hasn't started yet.
498 """
499 def handler(e):
500 """Retry exception handler.
501
502 Assumes that the error is due to the update service not having
503 started yet.
504
505 @param e: the exception intercepted by the retry util.
506 """
507 if isinstance(e, error.AutoservRunError):
508 logging.debug('update service check exception: %s\n'
509 'retrying...', e)
510 return True
511 else:
512 return False
513
514 # Retry at most three times, every 5s.
515 status = retry_util.GenericRetry(handler, 3,
516 self.check_update_status,
517 sleep=5)
518
519 # Expect the update engine to be idle.
520 if status != UPDATER_IDLE:
Richard Barnette9d43e562018-06-05 17:20:10 +0000521 raise RootFSUpdateError(
522 'Update engine status is %s (%s was expected).'
523 % (status, UPDATER_IDLE))
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800524
525
Richard Barnette55d1af82018-05-22 23:40:14 +0000526 def _reset_update_engine(self):
527 """Resets the host to prepare for a clean update regardless of state."""
528 self._run('stop ui || true')
529 self._run('stop update-engine || true')
530 self._run('start update-engine')
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800531 self._wait_for_update_service()
532
Richard Barnette55d1af82018-05-22 23:40:14 +0000533
534 def _reset_stateful_partition(self):
535 """Clear any pending stateful update request."""
Richard Barnette18fd5842018-05-25 18:21:14 +0000536 self._run('%s --stateful_change=reset 2>&1'
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700537 % self._get_stateful_update_script())
Richard Barnette3ef29a82018-06-28 13:52:54 -0700538 self._run('rm -f %s' % _TARGET_VERSION)
539
540
541 def _set_target_version(self):
542 """Set the "target version" for the update."""
543 version_number = self.update_version.split('-')[1]
544 self._run('echo %s > %s' % (version_number, _TARGET_VERSION))
Richard Barnette55d1af82018-05-22 23:40:14 +0000545
546
547 def _revert_boot_partition(self):
548 """Revert the boot partition."""
549 part = self._rootdev('-s')
550 logging.warning('Reverting update; Boot partition will be %s', part)
551 return self._run('/postinst %s 2>&1' % part)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700552
553
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700554 def _verify_kernel_state(self):
555 """Verify that the next kernel to boot is correct for update.
556
557 This tests that the kernel state is correct for a successfully
558 downloaded and installed update. That is, the next kernel to
559 boot must be the currently inactive kernel.
560
561 @raise RootFSUpdateError if the DUT next kernel isn't the
562 expected next kernel.
563 """
564 inactive_kernel = self.get_kernel_state()[1]
565 next_kernel = self._get_next_kernel()
566 if next_kernel != inactive_kernel:
567 raise RootFSUpdateError(
568 'Update failed. The kernel for next boot is %s, '
569 'but %s was expected.'
570 % (next_kernel['name'], inactive_kernel['name']))
571 return inactive_kernel
572
573
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700574 def _verify_update_completed(self):
575 """Verifies that an update has completed.
576
Richard Barnette9d43e562018-06-05 17:20:10 +0000577 @raise RootFSUpdateError if the DUT doesn't indicate that
578 download is complete and the DUT is ready for reboot.
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700579 """
580 status = self.check_update_status()
581 if status != UPDATER_NEED_REBOOT:
Shuqian Zhaod9992722016-02-29 12:26:38 -0800582 error_msg = ''
583 if status == UPDATER_IDLE:
Richard Barnette3e8b2282018-05-15 20:42:20 +0000584 error_msg = 'Update error: %s' % self._get_last_update_error()
Richard Barnette9d43e562018-06-05 17:20:10 +0000585 raise RootFSUpdateError(
586 'Update engine status is %s (%s was expected). %s'
587 % (status, UPDATER_NEED_REBOOT, error_msg))
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700588 return self._verify_kernel_state()
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700589
590
Richard Barnette55d1af82018-05-22 23:40:14 +0000591 def trigger_update(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000592 """Triggers a background update."""
593 # If this function is called immediately after reboot (which it
594 # can be), there is no guarantee that the update engine is up
595 # and running yet, so wait for it.
Richard Barnette55d1af82018-05-22 23:40:14 +0000596 self._wait_for_update_service()
597
598 autoupdate_cmd = ('%s --check_for_update --omaha_url=%s' %
599 (_UPDATER_BIN, self.update_url))
600 run_args = {'command': autoupdate_cmd}
601 err_prefix = 'Failed to trigger an update on %s. ' % self.host.hostname
602 logging.info('Triggering update via: %s', autoupdate_cmd)
603 metric_fields = {'success': False}
604 try:
605 self._base_update_handler(run_args, err_prefix)
606 metric_fields['success'] = True
607 finally:
608 c = metrics.Counter('chromeos/autotest/autoupdater/trigger')
Richard Barnette621a8e42018-06-25 17:34:11 -0700609 metric_fields.update(_get_metric_fields(self.update_url))
Richard Barnette55d1af82018-05-22 23:40:14 +0000610 c.increment(fields=metric_fields)
611
612
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700613 def update_image(self):
Richard Barnette18fd5842018-05-25 18:21:14 +0000614 """Updates the device root FS and kernel and verifies success."""
Shuqian Zhaofe4d62e2016-06-23 14:46:45 -0700615 autoupdate_cmd = ('%s --update --omaha_url=%s' %
Richard Barnette3e8b2282018-05-15 20:42:20 +0000616 (_UPDATER_BIN, self.update_url))
David Haddock76a4c882017-12-13 18:50:09 -0800617 if not self.interactive:
618 autoupdate_cmd = '%s --interactive=false' % autoupdate_cmd
Shuqian Zhaod9992722016-02-29 12:26:38 -0800619 run_args = {'command': autoupdate_cmd, 'timeout': 3600}
620 err_prefix = ('Failed to install device image using payload at %s '
621 'on %s. ' % (self.update_url, self.host.hostname))
622 logging.info('Updating image via: %s', autoupdate_cmd)
Allen Li1a5cc0a2017-06-20 14:08:59 -0700623 metric_fields = {'success': False}
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800624 try:
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800625 self._base_update_handler(run_args, err_prefix)
Allen Li1a5cc0a2017-06-20 14:08:59 -0700626 metric_fields['success'] = True
627 finally:
Allen Li1a5cc0a2017-06-20 14:08:59 -0700628 c = metrics.Counter('chromeos/autotest/autoupdater/update')
Richard Barnette621a8e42018-06-25 17:34:11 -0700629 metric_fields.update(_get_metric_fields(self.update_url))
Allen Li1a5cc0a2017-06-20 14:08:59 -0700630 c.increment(fields=metric_fields)
Richard Barnette4d211c92018-05-24 18:56:08 +0000631 return self._verify_update_completed()
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700632
633
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700634 def _get_remote_script(self, script_name):
635 """Ensure that `script_name` is present on the DUT.
Chris Sosa5e4246b2012-05-22 18:05:22 -0700636
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700637 The given script (e.g. `stateful_update`) may be present in the
638 stateful partition under /usr/local/bin, or we may have to
639 download it from the devserver.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700640
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700641 Determine whether the script is present or must be downloaded
642 and download if necessary. Then, return a command fragment
643 sufficient to run the script from whereever it now lives on the
644 DUT.
Richard Barnette9d43e562018-06-05 17:20:10 +0000645
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700646 @param script_name The name of the script as expected in
647 /usr/local/bin and on the devserver.
648 @return A string with the command (minus arguments) that will
649 run the target script.
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700650 """
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700651 remote_script = '/usr/local/bin/%s' % script_name
652 if self.host.path_exists(remote_script):
653 return remote_script
654 remote_tmp_script = '/tmp/%s' % script_name
655 server_name = urlparse.urlparse(self.update_url)[1]
656 script_url = 'http://%s/static/%s' % (server_name, script_name)
657 fetch_script = (
658 'curl -o %s %s && head -1 %s | grep "^#!" | sed "s/#!//"') % (
659 remote_tmp_script, script_url, remote_tmp_script)
660 script_interpreter = self._run(fetch_script,
661 ignore_status=True).stdout.strip()
662 if not script_interpreter:
663 return None
664 return '%s %s' % (script_interpreter, remote_tmp_script)
Chris Sosa5e4246b2012-05-22 18:05:22 -0700665
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700666
667 def _get_stateful_update_script(self):
668 """Returns a command to run the stateful update script.
669
670 Find `stateful_update` on the target or install it, as
671 necessary. If installation fails, raise an exception.
672
673 @raise StatefulUpdateError if the script can't be found or
674 installed.
675 @return A string that can be joined with arguments to run the
676 `stateful_update` command on the DUT.
677 """
678 script_command = self._get_remote_script(_STATEFUL_UPDATE_SCRIPT)
679 if not script_command:
680 raise StatefulUpdateError('Could not install %s on DUT'
Richard Barnette9d43e562018-06-05 17:20:10 +0000681 % _STATEFUL_UPDATE_SCRIPT)
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700682 return script_command
Chris Sosa5e4246b2012-05-22 18:05:22 -0700683
684
Chris Sosac1932172013-10-16 13:28:53 -0700685 def rollback_rootfs(self, powerwash):
686 """Triggers rollback and waits for it to complete.
687
688 @param powerwash: If true, powerwash as part of rollback.
689
690 @raise RootFSUpdateError if anything went wrong.
Chris Sosac1932172013-10-16 13:28:53 -0700691 """
Dan Shi549fb822015-03-24 18:01:11 -0700692 version = self.host.get_release_version()
Chris Sosac8617522014-06-09 23:22:26 +0000693 # Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches
694 # X.Y.Z. This version split just pulls the first part out.
695 try:
696 build_number = int(version.split('.')[0])
697 except ValueError:
698 logging.error('Could not parse build number.')
699 build_number = 0
700
701 if build_number >= 5772:
Richard Barnette3e8b2282018-05-15 20:42:20 +0000702 can_rollback_cmd = '%s --can_rollback' % _UPDATER_BIN
Chris Sosac8617522014-06-09 23:22:26 +0000703 logging.info('Checking for rollback.')
704 try:
705 self._run(can_rollback_cmd)
706 except error.AutoservRunError as e:
707 raise RootFSUpdateError("Rollback isn't possible on %s: %s" %
708 (self.host.hostname, str(e)))
709
Richard Barnette3e8b2282018-05-15 20:42:20 +0000710 rollback_cmd = '%s --rollback --follow' % _UPDATER_BIN
Chris Sosac1932172013-10-16 13:28:53 -0700711 if not powerwash:
Dan Shif3a35f72016-01-25 11:18:14 -0800712 rollback_cmd += ' --nopowerwash'
Chris Sosac1932172013-10-16 13:28:53 -0700713
Chris Sosac8617522014-06-09 23:22:26 +0000714 logging.info('Performing rollback.')
Chris Sosac1932172013-10-16 13:28:53 -0700715 try:
716 self._run(rollback_cmd)
Chris Sosac1932172013-10-16 13:28:53 -0700717 except error.AutoservRunError as e:
718 raise RootFSUpdateError('Rollback failed on %s: %s' %
719 (self.host.hostname, str(e)))
720
721 self._verify_update_completed()
722
Gilad Arnold0ed760c2012-11-05 23:42:53 -0800723
Chris Sosa72312602013-04-16 15:01:56 -0700724 def update_stateful(self, clobber=True):
725 """Updates the stateful partition.
726
727 @param clobber: If True, a clean stateful installation.
Richard Barnette9d43e562018-06-05 17:20:10 +0000728
729 @raise StatefulUpdateError if the update script fails to
730 complete successfully.
Chris Sosa72312602013-04-16 15:01:56 -0700731 """
Chris Sosa77556d82012-04-05 15:23:14 -0700732 logging.info('Updating stateful partition...')
Richard Barnette18fd5842018-05-25 18:21:14 +0000733 statefuldev_url = self.update_url.replace('update', 'static')
Chris Sosaa3ac2152012-05-23 22:23:13 -0700734
Dale Curtis5c32c722011-05-04 19:24:23 -0700735 # Attempt stateful partition update; this must succeed so that the newly
736 # installed host is testable after update.
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700737 statefuldev_cmd = [self._get_stateful_update_script(), statefuldev_url]
Chris Sosa72312602013-04-16 15:01:56 -0700738 if clobber:
739 statefuldev_cmd.append('--stateful_change=clean')
740
741 statefuldev_cmd.append('2>&1')
Dale Curtis5c32c722011-05-04 19:24:23 -0700742 try:
Dan Shi205b8732016-01-25 10:56:22 -0800743 self._run(' '.join(statefuldev_cmd), timeout=1200)
Dale Curtis5c32c722011-05-04 19:24:23 -0700744 except error.AutoservRunError:
Richard Barnette18fd5842018-05-25 18:21:14 +0000745 raise StatefulUpdateError(
Gilad Arnold62cf3a42015-10-01 09:15:25 -0700746 'Failed to perform stateful update on %s' %
747 self.host.hostname)
Dale Curtis5c32c722011-05-04 19:24:23 -0700748
Chris Sosaa3ac2152012-05-23 22:23:13 -0700749
Richard Barnette54d14f52018-05-18 16:39:49 +0000750 def verify_boot_expectations(self, expected_kernel, rollback_message):
Richard Barnette55d1af82018-05-22 23:40:14 +0000751 """Verifies that we fully booted given expected kernel state.
752
753 This method both verifies that we booted using the correct kernel
754 state and that the OS has marked the kernel as good.
755
Richard Barnette54d14f52018-05-18 16:39:49 +0000756 @param expected_kernel: kernel that we are verifying with,
Richard Barnette55d1af82018-05-22 23:40:14 +0000757 i.e. I expect to be booted onto partition 4 etc. See output of
758 get_kernel_state.
Richard Barnette9d43e562018-06-05 17:20:10 +0000759 @param rollback_message: string include in except message text
Richard Barnette55d1af82018-05-22 23:40:14 +0000760 if we booted with the wrong partition.
761
Richard Barnette9d43e562018-06-05 17:20:10 +0000762 @raise NewBuildUpdateError if any of the various checks fail.
Richard Barnette55d1af82018-05-22 23:40:14 +0000763 """
764 # Figure out the newly active kernel.
Richard Barnette54d14f52018-05-18 16:39:49 +0000765 active_kernel = self.get_kernel_state()[0]
Richard Barnette55d1af82018-05-22 23:40:14 +0000766
767 # Check for rollback due to a bad build.
Richard Barnette54d14f52018-05-18 16:39:49 +0000768 if active_kernel != expected_kernel:
Richard Barnette55d1af82018-05-22 23:40:14 +0000769
770 # Kernel crash reports should be wiped between test runs, but
771 # may persist from earlier parts of the test, or from problems
772 # with provisioning.
773 #
774 # Kernel crash reports will NOT be present if the crash happened
775 # before encrypted stateful is mounted.
776 #
777 # TODO(dgarrett): Integrate with server/crashcollect.py at some
778 # point.
779 kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash')
780 if kernel_crashes:
781 rollback_message += ': kernel_crash'
782 logging.debug('Found %d kernel crash reports:',
783 len(kernel_crashes))
784 # The crash names contain timestamps that may be useful:
785 # kernel.20131207.005945.0.kcrash
786 for crash in kernel_crashes:
787 logging.debug(' %s', os.path.basename(crash))
788
789 # Print out some information to make it easier to debug
790 # the rollback.
791 logging.debug('Dumping partition table.')
792 self._run('cgpt show $(rootdev -s -d)')
793 logging.debug('Dumping crossystem for firmware debugging.')
794 self._run('crossystem --all')
Richard Barnette9d43e562018-06-05 17:20:10 +0000795 raise NewBuildUpdateError(self.update_version, rollback_message)
Richard Barnette55d1af82018-05-22 23:40:14 +0000796
797 # Make sure chromeos-setgoodkernel runs.
798 try:
799 utils.poll_for_condition(
Richard Barnette54d14f52018-05-18 16:39:49 +0000800 lambda: (self._get_kernel_tries(active_kernel) == 0
801 and self._get_kernel_success(active_kernel)),
Richard Barnette9d43e562018-06-05 17:20:10 +0000802 exception=RootFSUpdateError(),
Richard Barnette55d1af82018-05-22 23:40:14 +0000803 timeout=_KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
Richard Barnette9d43e562018-06-05 17:20:10 +0000804 except RootFSUpdateError:
Richard Barnette55d1af82018-05-22 23:40:14 +0000805 services_status = self._run('status system-services').stdout
806 if services_status != 'system-services start/running\n':
Richard Barnette5adb6d42018-06-28 15:52:32 -0700807 event = NewBuildUpdateError.CHROME_FAILURE
Richard Barnette55d1af82018-05-22 23:40:14 +0000808 else:
Richard Barnette5adb6d42018-06-28 15:52:32 -0700809 event = NewBuildUpdateError.UPDATE_ENGINE_FAILURE
Richard Barnette9d43e562018-06-05 17:20:10 +0000810 raise NewBuildUpdateError(self.update_version, event)
Richard Barnette55d1af82018-05-22 23:40:14 +0000811
812
Richard Barnette14ee84c2018-05-18 20:23:42 +0000813 def _prepare_host(self):
814 """Make sure the target DUT is working and ready for update.
815
816 Initially, the target DUT's state is unknown. The DUT is
817 expected to be online, but we strive to be forgiving if Chrome
818 and/or the update engine aren't fully functional.
819 """
820 # Summary of work, and the rationale:
821 # 1. Reboot, because it's a good way to clear out problems.
822 # 2. Touch the PROVISION_FAILED file, to allow repair to detect
823 # failure later.
824 # 3. Run the hook for host class specific preparation.
825 # 4. Stop Chrome, because the system is designed to eventually
826 # reboot if Chrome is stuck in a crash loop.
827 # 5. Force `update-engine` to start, because if Chrome failed
828 # to start properly, the status of the `update-engine` job
829 # will be uncertain.
Richard Barnette5adb6d42018-06-28 15:52:32 -0700830 if not self.host.is_up():
831 raise HostUpdateError(self.host.hostname,
832 HostUpdateError.DUT_DOWN)
Richard Barnette14ee84c2018-05-18 20:23:42 +0000833 self._reset_stateful_partition()
834 self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
835 self._run('touch %s' % PROVISION_FAILED)
836 self.host.prepare_for_update()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700837 self._reset_update_engine()
Richard Barnette14ee84c2018-05-18 20:23:42 +0000838 logging.info('Updating from version %s to %s.',
839 self.host.get_release_version(),
840 self.update_version)
841
842
843 def _verify_devserver(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000844 """Check that our chosen devserver is still working.
845
846 @raise DevServerError if the devserver fails any sanity check.
847 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000848 server = 'http://%s' % urlparse.urlparse(self.update_url)[1]
849 try:
850 if not dev_server.ImageServer.devserver_healthy(server):
Richard Barnette9d43e562018-06-05 17:20:10 +0000851 raise DevServerError(
852 server, 'Devserver is not healthy')
Richard Barnette14ee84c2018-05-18 20:23:42 +0000853 except Exception as e:
Richard Barnette9d43e562018-06-05 17:20:10 +0000854 raise DevServerError(
855 server, 'Devserver is not up and available')
Richard Barnette14ee84c2018-05-18 20:23:42 +0000856
857
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700858 def _install_via_update_engine(self):
859 """Install an updating using the production AU flow.
860
861 This uses the standard AU flow and the `stateful_update` script
862 to download and install a root FS, kernel and stateful
863 filesystem content.
864
865 @return The kernel expected to be booted next.
866 """
867 logging.info('Installing image using update_engine.')
868 expected_kernel = self.update_image()
869 self.update_stateful()
Richard Barnette3ef29a82018-06-28 13:52:54 -0700870 self._set_target_version()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700871 return expected_kernel
872
873
874 def _install_via_quick_provision(self):
875 """Install an updating using the `quick-provision` script.
876
877 This uses the `quick-provision` script to download and install
878 a root FS, kernel and stateful filesystem content.
879
880 @return The kernel expected to be booted next.
881 """
882 build_re = global_config.global_config.get_config_value(
883 'CROS', 'quick_provision_build_regex', type=str, default='')
884 image_name = url_to_image_name(self.update_url)
885 if not build_re or re.match(build_re, image_name) is None:
886 logging.info('Not eligible for quick-provision.')
887 return None
888 logging.info('Installing image using quick-provision.')
889 provision_command = self._get_remote_script(_QUICK_PROVISION_SCRIPT)
890 server_name = urlparse.urlparse(self.update_url)[1]
891 static_url = 'http://%s/static' % server_name
892 command = '%s --noreboot %s %s' % (
893 provision_command, image_name, static_url)
894 try:
895 self._run(command)
Richard Barnette3ef29a82018-06-28 13:52:54 -0700896 self._set_target_version()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700897 return self._verify_kernel_state()
898 except Exception:
899 # N.B. We handle only `Exception` here. Non-Exception
900 # classes (such as KeyboardInterrupt) are handled by our
901 # caller.
902 logging.exception('quick-provision script failed; '
903 'will fall back to update_engine.')
904 self._revert_boot_partition()
905 self._reset_stateful_partition()
906 self._reset_update_engine()
907 return None
908
909
Richard Barnette54d14f52018-05-18 16:39:49 +0000910 def _install_update(self):
Richard Barnette0beb14b2018-05-15 18:07:52 +0000911 """Install the requested image on the DUT, but don't start it.
912
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700913 This downloads and installs a root FS, kernel and stateful
914 filesystem content. This does not reboot the DUT, so the update
915 is merely pending when the method returns.
916
917 @return The kernel expected to be booted next.
Dan Shi0f466e82013-02-22 15:44:58 -0800918 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000919 logging.info('Installing image at %s onto %s',
920 self.update_url, self.host.hostname)
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200921 try:
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700922 return (self._install_via_quick_provision()
923 or self._install_via_update_engine())
Dale Curtis1e973182011-07-12 18:21:36 -0700924 except:
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700925 # N.B. This handling code includes non-Exception classes such
926 # as KeyboardInterrupt. We need to clean up, but we also must
927 # re-raise.
Richard Barnette14ee84c2018-05-18 20:23:42 +0000928 self._revert_boot_partition()
929 self._reset_stateful_partition()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700930 self._reset_update_engine()
Dale Curtis1e973182011-07-12 18:21:36 -0700931 # Collect update engine logs in the event of failure.
932 if self.host.job:
Aviv Keshet2610d3e2016-06-01 16:37:01 -0700933 logging.info('Collecting update engine logs due to failure...')
Dale Curtis1e973182011-07-12 18:21:36 -0700934 self.host.get_file(
Richard Barnette3e8b2282018-05-15 20:42:20 +0000935 _UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
Gilad Arnold0c0df732015-09-21 06:37:59 -0700936 preserve_perm=False)
Richard Barnette3e8b2282018-05-15 20:42:20 +0000937 _list_image_dir_contents(self.update_url)
Dale Curtis1e973182011-07-12 18:21:36 -0700938 raise
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200939
940
Richard Barnette14ee84c2018-05-18 20:23:42 +0000941 def _complete_update(self, expected_kernel):
942 """Finish the update, and confirm that it succeeded.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000943
Richard Barnette14ee84c2018-05-18 20:23:42 +0000944 Initial condition is that the target build has been downloaded
945 and installed on the DUT, but has not yet been booted. This
946 function is responsible for rebooting the DUT, and checking that
947 the new build is running successfully.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000948
Richard Barnette14ee84c2018-05-18 20:23:42 +0000949 @param expected_kernel: kernel expected to be active after reboot.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000950 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000951 # Regarding the 'crossystem' command below: In some cases,
952 # the update flow puts the TPM into a state such that it
953 # fails verification. We don't know why. However, this
954 # call papers over the problem by clearing the TPM during
955 # the reboot.
956 #
957 # We ignore failures from 'crossystem'. Although failure
958 # here is unexpected, and could signal a bug, the point of
959 # the exercise is to paper over problems; allowing this to
960 # fail would defeat the purpose.
961 self._run('crossystem clear_tpm_owner_request=1',
962 ignore_status=True)
963 self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
964
Richard Barnette0beb14b2018-05-15 18:07:52 +0000965 # Touch the lab machine file to leave a marker that
966 # distinguishes this image from other test images.
967 # Afterwards, we must re-run the autoreboot script because
968 # it depends on the _LAB_MACHINE_FILE.
969 autoreboot_cmd = ('FILE="%s" ; [ -f "$FILE" ] || '
970 '( touch "$FILE" ; start autoreboot )')
Richard Barnette3e8b2282018-05-15 20:42:20 +0000971 self._run(autoreboot_cmd % _LAB_MACHINE_FILE)
Richard Barnette0beb14b2018-05-15 18:07:52 +0000972 self.verify_boot_expectations(
Richard Barnette5adb6d42018-06-28 15:52:32 -0700973 expected_kernel, NewBuildUpdateError.ROLLBACK_FAILURE)
Richard Barnette0beb14b2018-05-15 18:07:52 +0000974
975 logging.debug('Cleaning up old autotest directories.')
976 try:
977 installed_autodir = autotest.Autotest.get_installed_autodir(
978 self.host)
979 self._run('rm -rf ' + installed_autodir)
980 except autotest.AutodirNotFoundError:
981 logging.debug('No autotest installed directory found.')
982
983
Richard Barnette621a8e42018-06-25 17:34:11 -0700984 def _run_update_steps(self):
985 """Perform a full update of a DUT, with diagnosis for failures.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000986
Richard Barnette621a8e42018-06-25 17:34:11 -0700987 Run the individual steps of the update. If a step fails, make
988 sure that the exception raised describes the failure with a
989 diagnosis based on the step that failed.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000990
Richard Barnette621a8e42018-06-25 17:34:11 -0700991 @raise HostUpdateError if a failure is caused by a problem on
Richard Barnette9d43e562018-06-05 17:20:10 +0000992 the DUT prior to the update.
Richard Barnette621a8e42018-06-25 17:34:11 -0700993 @raise ImageInstallError if a failure occurs during download
Richard Barnette9d43e562018-06-05 17:20:10 +0000994 and install of the update and cannot be definitively
995 blamed on either the DUT or the devserver.
Richard Barnette621a8e42018-06-25 17:34:11 -0700996 @raise NewBuildUpdateError if a failure occurs because the
Richard Barnette9d43e562018-06-05 17:20:10 +0000997 new build fails to function correctly.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000998 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000999 self._verify_devserver()
Richard Barnette9d43e562018-06-05 17:20:10 +00001000
1001 try:
1002 self._prepare_host()
1003 except _AttributedUpdateError:
1004 raise
1005 except Exception as e:
1006 logging.exception('Failure preparing host prior to update.')
1007 raise HostUpdateError(self.host.hostname, str(e))
1008
1009 try:
1010 expected_kernel = self._install_update()
1011 except _AttributedUpdateError:
1012 raise
1013 except Exception as e:
1014 logging.exception('Failure during download and install.')
Richard Barnette621a8e42018-06-25 17:34:11 -07001015 server_name = dev_server.get_resolved_hostname(self.update_url)
Richard Barnette9d43e562018-06-05 17:20:10 +00001016 raise ImageInstallError(self.host.hostname, server_name, str(e))
1017
1018 try:
1019 self._complete_update(expected_kernel)
1020 except _AttributedUpdateError:
1021 raise
1022 except Exception as e:
1023 logging.exception('Failure from build after update.')
1024 raise NewBuildUpdateError(self.update_version, str(e))
Richard Barnette0beb14b2018-05-15 18:07:52 +00001025
Richard Barnette621a8e42018-06-25 17:34:11 -07001026
1027 def run_update(self):
1028 """Perform a full update of a DUT in the test lab.
1029
1030 This downloads and installs the root FS and stateful partition
1031 content needed for the update specified in `self.host` and
1032 `self.update_url`. The update is performed according to the
1033 requirements for provisioning a DUT for testing the requested
1034 build.
1035
1036 At the end of the procedure, metrics are reported describing the
1037 outcome of the operation.
1038
1039 @returns A tuple of the form `(image_name, attributes)`, where
1040 `image_name` is the name of the image installed, and
1041 `attributes` is new attributes to be applied to the DUT.
1042 """
1043 start_time = time.time()
1044 failure_reason = None
1045 server_name = dev_server.get_resolved_hostname(self.update_url)
1046 metrics.Counter(_metric_name('install')).increment(
1047 fields={'devserver': server_name})
1048 try:
1049 self._run_update_steps()
1050 except _AttributedUpdateError as e:
1051 failure_reason = e.failure_summary
1052 raise
1053 except Exception as e:
1054 failure_reason = 'Unknown failure'
1055 raise
1056 finally:
1057 end_time = time.time()
Richard Barnette045eb5d2018-07-09 14:07:01 -07001058 _emit_updater_metrics(
Richard Barnette621a8e42018-06-25 17:34:11 -07001059 self.update_url, self.host.hostname,
1060 failure_reason, end_time - start_time)
1061
Richard Barnette0beb14b2018-05-15 18:07:52 +00001062 image_name = url_to_image_name(self.update_url)
1063 # update_url is different from devserver url needed to stage autotest
1064 # packages, therefore, resolve a new devserver url here.
1065 devserver_url = dev_server.ImageServer.resolve(
1066 image_name, self.host.hostname).url()
1067 repo_url = tools.get_package_url(devserver_url, image_name)
1068 return image_name, {ds_constants.JOB_REPO_URL: repo_url}