blob: 9d86e5cb7a68f07e234532a9a3084fa60c5a9e3c [file] [log] [blame]
Chris Sosa5e4246b2012-05-22 18:05:22 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Sean O'Connor5346e4e2010-08-12 18:49:24 +02002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Don Garrett56b1cc82013-12-06 17:49:20 -08005import glob
Sean O'Connor5346e4e2010-08-12 18:49:24 +02006import logging
Dale Curtis5c32c722011-05-04 19:24:23 -07007import os
Sean O'Connor5346e4e2010-08-12 18:49:24 +02008import re
Richard Barnette621a8e42018-06-25 17:34:11 -07009import time
Prashanth B32baa9b2014-03-13 13:23:01 -070010import urllib2
Richard Barnette0beb14b2018-05-15 18:07:52 +000011import urlparse
Sean O'Connor5346e4e2010-08-12 18:49:24 +020012
Chris Sosa65425082013-10-16 13:26:22 -070013from autotest_lib.client.bin import utils
Dale Curtis5c32c722011-05-04 19:24:23 -070014from autotest_lib.client.common_lib import error, global_config
Prashanth B32baa9b2014-03-13 13:23:01 -070015from autotest_lib.client.common_lib.cros import dev_server
Richard Barnette0beb14b2018-05-15 18:07:52 +000016from autotest_lib.server import autotest
Shelley Chen61d28982016-10-28 09:40:20 -070017from autotest_lib.server import utils as server_utils
Richard Barnette0beb14b2018-05-15 18:07:52 +000018from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
19from autotest_lib.server.cros.dynamic_suite import tools
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -080020from chromite.lib import retry_util
Dan Shif3a35f72016-01-25 11:18:14 -080021
Shelley Chen16b8df32016-10-27 16:24:21 -070022try:
23 from chromite.lib import metrics
Dan Shi5e2efb72017-02-07 11:40:23 -080024except ImportError:
25 metrics = utils.metrics_mock
Sean O'Connor5346e4e2010-08-12 18:49:24 +020026
Gwendal Grignou3e96cc22017-06-07 16:22:51 -070027
Richard Barnette621a8e42018-06-25 17:34:11 -070028def _metric_name(base_name):
29 return 'chromeos/autotest/provision/' + base_name
30
31
Dale Curtis5c32c722011-05-04 19:24:23 -070032# Local stateful update path is relative to the CrOS source directory.
Sean O'Connor5346e4e2010-08-12 18:49:24 +020033UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
Sean Oc053dfe2010-08-23 18:22:26 +020034UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
beeps5e8c45a2013-12-17 22:05:11 -080035# A list of update engine client states that occur after an update is triggered.
36UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE',
37 'UPDATE_STATUS_UPDATE_AVAILABLE',
38 'UPDATE_STATUS_DOWNLOADING',
39 'UPDATE_STATUS_FINALIZING']
Sean O'Connor5346e4e2010-08-12 18:49:24 +020040
Richard Barnette0beb14b2018-05-15 18:07:52 +000041
Richard Barnette3e8b2282018-05-15 20:42:20 +000042_STATEFUL_UPDATE_SCRIPT = 'stateful_update'
Richard Barnettee86b1ce2018-06-07 10:37:23 -070043_QUICK_PROVISION_SCRIPT = 'quick-provision'
Richard Barnette3e8b2282018-05-15 20:42:20 +000044
45_UPDATER_BIN = '/usr/bin/update_engine_client'
46_UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine']
47
48_KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3}
49_KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5}
50
51# Time to wait for new kernel to be marked successful after
52# auto update.
53_KERNEL_UPDATE_TIMEOUT = 120
54
55
Richard Barnette0beb14b2018-05-15 18:07:52 +000056# PROVISION_FAILED - A flag file to indicate provision failures. The
57# file is created at the start of any AU procedure (see
Richard Barnette9d43e562018-06-05 17:20:10 +000058# `ChromiumOSUpdater._prepare_host()`). The file's location in
Richard Barnette0beb14b2018-05-15 18:07:52 +000059# stateful means that on successul update it will be removed. Thus, if
60# this file exists, it indicates that we've tried and failed in a
61# previous attempt to update.
62PROVISION_FAILED = '/var/tmp/provision_failed'
63
64
Richard Barnette3e8b2282018-05-15 20:42:20 +000065# A flag file used to enable special handling in lab DUTs. Some
66# parts of the system in Chromium OS test images will behave in ways
67# convenient to the test lab when this file is present. Generally,
68# we create this immediately after any update completes.
69_LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
70
71
Richard Barnette3ef29a82018-06-28 13:52:54 -070072# _TARGET_VERSION - A file containing the new version to which we plan
73# to update. This file is used by the CrOS shutdown code to detect and
74# handle certain version downgrade cases. Specifically: Downgrading
75# may trigger an unwanted powerwash in the target build when the
76# following conditions are met:
77# * Source build is a v4.4 kernel with R69-10756.0.0 or later.
78# * Target build predates the R69-10756.0.0 cutoff.
79# When this file is present and indicates a downgrade, the OS shutdown
80# code on the DUT knows how to prevent the powerwash.
81_TARGET_VERSION = '/run/update_target_version'
82
83
Richard Barnette9d43e562018-06-05 17:20:10 +000084class RootFSUpdateError(error.TestFail):
Chris Sosa77556d82012-04-05 15:23:14 -070085 """Raised when the RootFS fails to update."""
Chris Sosa77556d82012-04-05 15:23:14 -070086
87
Richard Barnette9d43e562018-06-05 17:20:10 +000088class StatefulUpdateError(error.TestFail):
Chris Sosa77556d82012-04-05 15:23:14 -070089 """Raised when the stateful partition fails to update."""
Chris Sosa77556d82012-04-05 15:23:14 -070090
91
Richard Barnette9d43e562018-06-05 17:20:10 +000092class _AttributedUpdateError(error.TestFail):
93 """Update failure with an attributed cause."""
94
95 def __init__(self, attribution, msg):
96 super(_AttributedUpdateError, self).__init__(
97 '%s: %s' % (attribution, msg))
98
99
100class HostUpdateError(_AttributedUpdateError):
101 """Failure updating a DUT attributable to the DUT.
102
103 This class of exception should be raised when the most likely cause
104 of failure was a condition existing on the DUT prior to the update,
105 such as a hardware problem, or a bug in the software on the DUT.
106 """
107
108 def __init__(self, hostname, msg):
109 super(HostUpdateError, self).__init__(
110 'Error on %s prior to update' % hostname, msg)
111
Richard Barnette621a8e42018-06-25 17:34:11 -0700112 @property
113 def failure_summary(self):
114 #pylint: disable=missing-docstring
115 return 'DUT failed prior to update'
116
Richard Barnette9d43e562018-06-05 17:20:10 +0000117
118class DevServerError(_AttributedUpdateError):
119 """Failure updating a DUT attributable to the devserver.
120
121 This class of exception should be raised when the most likely cause
122 of failure was the devserver serving the target image for update.
123 """
124
125 def __init__(self, devserver, msg):
126 super(DevServerError, self).__init__(
127 'Devserver error on %s' % devserver, msg)
128
Richard Barnette621a8e42018-06-25 17:34:11 -0700129 @property
130 def failure_summary(self):
131 #pylint: disable=missing-docstring
132 return 'Devserver failed prior to update'
133
Richard Barnette9d43e562018-06-05 17:20:10 +0000134
135class ImageInstallError(_AttributedUpdateError):
136 """Failure updating a DUT when installing from the devserver.
137
138 This class of exception should be raised when the target DUT fails
139 to download and install the target image from the devserver, and
140 either the devserver or the DUT might be at fault.
141 """
142
143 def __init__(self, hostname, devserver, msg):
144 super(ImageInstallError, self).__init__(
145 'Download and install failed from %s onto %s'
146 % (devserver, hostname), msg)
147
Richard Barnette621a8e42018-06-25 17:34:11 -0700148 @property
149 def failure_summary(self):
150 #pylint: disable=missing-docstring
151 return 'Image failed to download and install'
152
Richard Barnette9d43e562018-06-05 17:20:10 +0000153
154class NewBuildUpdateError(_AttributedUpdateError):
155 """Failure updating a DUT attributable to the target build.
156
157 This class of exception should be raised when updating to a new
158 build fails, and the most likely cause of the failure is a bug in
159 the newly installed target build.
160 """
161
162 def __init__(self, update_version, msg):
163 super(NewBuildUpdateError, self).__init__(
164 'Failure in build %s' % update_version, msg)
165
Richard Barnette621a8e42018-06-25 17:34:11 -0700166 @property
167 def failure_summary(self):
168 #pylint: disable=missing-docstring
169 return 'Build failed to work after installing'
170
Richard Barnette9d43e562018-06-05 17:20:10 +0000171
Richard Barnette3e8b2282018-05-15 20:42:20 +0000172def _url_to_version(update_url):
Dan Shi0f466e82013-02-22 15:44:58 -0800173 """Return the version based on update_url.
174
175 @param update_url: url to the image to update to.
176
177 """
Dale Curtisddfdb942011-07-14 13:59:24 -0700178 # The Chrome OS version is generally the last element in the URL. The only
179 # exception is delta update URLs, which are rooted under the version; e.g.,
180 # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
181 # strip off the au section of the path before reading the version.
Dan Shi5002cfc2013-04-29 10:45:05 -0700182 return re.sub('/au/.*', '',
183 urlparse.urlparse(update_url).path).split('/')[-1].strip()
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200184
185
Scott Zawalskieadbf702013-03-14 09:23:06 -0400186def url_to_image_name(update_url):
187 """Return the image name based on update_url.
188
189 From a URL like:
190 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
191 return lumpy-release/R27-3837.0.0
192
193 @param update_url: url to the image to update to.
194 @returns a string representing the image name in the update_url.
195
196 """
197 return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
198
199
Prashanth B32baa9b2014-03-13 13:23:01 -0700200def _get_devserver_build_from_update_url(update_url):
201 """Get the devserver and build from the update url.
202
203 @param update_url: The url for update.
204 Eg: http://devserver:port/update/build.
205
206 @return: A tuple of (devserver url, build) or None if the update_url
207 doesn't match the expected pattern.
208
209 @raises ValueError: If the update_url doesn't match the expected pattern.
210 @raises ValueError: If no global_config was found, or it doesn't contain an
211 image_url_pattern.
212 """
213 pattern = global_config.global_config.get_config_value(
214 'CROS', 'image_url_pattern', type=str, default='')
215 if not pattern:
216 raise ValueError('Cannot parse update_url, the global config needs '
217 'an image_url_pattern.')
218 re_pattern = pattern.replace('%s', '(\S+)')
219 parts = re.search(re_pattern, update_url)
220 if not parts or len(parts.groups()) < 2:
221 raise ValueError('%s is not an update url' % update_url)
222 return parts.groups()
223
224
Richard Barnette3e8b2282018-05-15 20:42:20 +0000225def _list_image_dir_contents(update_url):
Prashanth B32baa9b2014-03-13 13:23:01 -0700226 """Lists the contents of the devserver for a given build/update_url.
227
228 @param update_url: An update url. Eg: http://devserver:port/update/build.
229 """
230 if not update_url:
231 logging.warning('Need update_url to list contents of the devserver.')
232 return
233 error_msg = 'Cannot check contents of devserver, update url %s' % update_url
234 try:
235 devserver_url, build = _get_devserver_build_from_update_url(update_url)
236 except ValueError as e:
237 logging.warning('%s: %s', error_msg, e)
238 return
239 devserver = dev_server.ImageServer(devserver_url)
240 try:
241 devserver.list_image_dir(build)
242 # The devserver will retry on URLError to avoid flaky connections, but will
243 # eventually raise the URLError if it persists. All HTTPErrors get
244 # converted to DevServerExceptions.
245 except (dev_server.DevServerException, urllib2.URLError) as e:
246 logging.warning('%s: %s', error_msg, e)
247
248
Richard Barnette621a8e42018-06-25 17:34:11 -0700249def _get_metric_fields(update_url):
250 """Return a dict of metric fields.
251
252 This is used for sending autoupdate metrics for the given update URL.
253
254 @param update_url Metrics fields will be calculated from this URL.
255 """
256 build_name = url_to_image_name(update_url)
257 try:
258 board, build_type, milestone, _ = server_utils.ParseBuildName(
259 build_name)
260 except server_utils.ParseBuildNameException:
261 logging.warning('Unable to parse build name %s for metrics. '
262 'Continuing anyway.', build_name)
263 board, build_type, milestone = ('', '', '')
264 return {
265 'dev_server': dev_server.get_resolved_hostname(update_url),
266 'board': board,
267 'build_type': build_type,
268 'milestone': milestone,
269 }
270
271
Richard Barnette045eb5d2018-07-09 14:07:01 -0700272def _emit_provision_metrics(name_prefix, build_name, failure_reason,
273 duration, fields):
274 # reset_after=True is required for String gauges events to ensure that
275 # the metrics are not repeatedly emitted until the server restarts.
276 metrics.String(_metric_name(name_prefix + '_build_by_devserver_dut'),
277 reset_after=True).set(build_name, fields=fields)
278 if failure_reason:
279 metrics.String(
280 _metric_name(name_prefix + '_failure_reason_by_devserver_dut'),
281 reset_after=True).set(failure_reason, fields=fields)
282 metrics.SecondsDistribution(
283 _metric_name(name_prefix + '_duration_by_devserver_dut')).add(
284 duration, fields=fields)
285
286
287def _emit_updater_metrics(update_url, dut_host_name,
288 failure_reason, duration):
289 """Send metrics for one provision request."""
Richard Barnette621a8e42018-06-25 17:34:11 -0700290 # The following is high cardinality, but sparse.
291 # Each DUT is of a single board type, and likely build type.
Richard Barnette045eb5d2018-07-09 14:07:01 -0700292 #
293 # TODO(jrbarnette) The devserver-triggered provisioning code
294 # included retries in certain cases. For that reason, the metrics
295 # distinguished 'provision' metrics which summarized across all
296 # retries, and 'auto_update' which summarized an individual update
297 # attempt. ChromiumOSUpdater doesn't do retries, so we just report
298 # the same information twice.
Richard Barnette621a8e42018-06-25 17:34:11 -0700299 image_fields = _get_metric_fields(update_url)
300 fields = {
301 'board': image_fields['board'],
302 'build_type': image_fields['build_type'],
303 'dut_host_name': dut_host_name,
304 'dev_server': image_fields['dev_server'],
305 'success': not failure_reason,
306 }
307 build_name = url_to_image_name(update_url)
Richard Barnette045eb5d2018-07-09 14:07:01 -0700308 _emit_provision_metrics('auto_update', build_name, failure_reason,
309 duration, fields)
310 fields['attempt'] = 1
311 _emit_provision_metrics('provision', build_name, failure_reason,
312 duration, fields)
Richard Barnette621a8e42018-06-25 17:34:11 -0700313
314
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700315# TODO(garnold) This implements shared updater functionality needed for
316# supporting the autoupdate_EndToEnd server-side test. We should probably
317# migrate more of the existing ChromiumOSUpdater functionality to it as we
318# expand non-CrOS support in other tests.
Richard Barnette3e8b2282018-05-15 20:42:20 +0000319class ChromiumOSUpdater(object):
320 """Chromium OS specific DUT update functionality."""
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700321
Richard Barnette3e8b2282018-05-15 20:42:20 +0000322 def __init__(self, update_url, host=None, interactive=True):
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700323 """Initializes the object.
324
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700325 @param update_url: The URL we want the update to use.
326 @param host: A client.common_lib.hosts.Host implementation.
David Haddock76a4c882017-12-13 18:50:09 -0800327 @param interactive: Bool whether we are doing an interactive update.
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700328 """
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700329 self.update_url = update_url
330 self.host = host
David Haddock76a4c882017-12-13 18:50:09 -0800331 self.interactive = interactive
Richard Barnette3e8b2282018-05-15 20:42:20 +0000332 self.update_version = _url_to_version(update_url)
333
334
335 def _run(self, cmd, *args, **kwargs):
336 """Abbreviated form of self.host.run(...)"""
337 return self.host.run(cmd, *args, **kwargs)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700338
339
340 def check_update_status(self):
341 """Returns the current update engine state.
342
343 We use the `update_engine_client -status' command and parse the line
344 indicating the update state, e.g. "CURRENT_OP=UPDATE_STATUS_IDLE".
345 """
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800346 update_status = self.host.run(command='%s -status | grep CURRENT_OP' %
Richard Barnette3e8b2282018-05-15 20:42:20 +0000347 _UPDATER_BIN)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700348 return update_status.stdout.strip().split('=')[-1]
349
350
Richard Barnette55d1af82018-05-22 23:40:14 +0000351 def _rootdev(self, options=''):
352 """Returns the stripped output of rootdev <options>.
353
354 @param options: options to run rootdev.
355
356 """
357 return self._run('rootdev %s' % options).stdout.strip()
358
359
360 def get_kernel_state(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000361 """Returns the (<active>, <inactive>) kernel state as a pair.
362
363 @raise RootFSUpdateError if the DUT reports a root partition
364 number that isn't one of the known valid values.
365 """
Richard Barnette55d1af82018-05-22 23:40:14 +0000366 active_root = int(re.findall('\d+\Z', self._rootdev('-s'))[0])
367 if active_root == _KERNEL_A['root']:
368 return _KERNEL_A, _KERNEL_B
369 elif active_root == _KERNEL_B['root']:
370 return _KERNEL_B, _KERNEL_A
371 else:
Richard Barnette9d43e562018-06-05 17:20:10 +0000372 raise RootFSUpdateError(
373 'Encountered unknown root partition: %s' % active_root)
Richard Barnette55d1af82018-05-22 23:40:14 +0000374
375
Richard Barnette18fd5842018-05-25 18:21:14 +0000376 def _cgpt(self, flag, kernel):
377 """Return numeric cgpt value for the specified flag, kernel, device."""
378 return int(self._run('cgpt show -n -i %d %s $(rootdev -s -d)' % (
379 kernel['kernel'], flag)).stdout.strip())
Richard Barnette55d1af82018-05-22 23:40:14 +0000380
381
382 def _get_next_kernel(self):
383 """Return the kernel that has priority for the next boot."""
384 priority_a = self._cgpt('-P', _KERNEL_A)
385 priority_b = self._cgpt('-P', _KERNEL_B)
386 if priority_a > priority_b:
387 return _KERNEL_A
388 else:
389 return _KERNEL_B
390
391
392 def _get_kernel_success(self, kernel):
393 """Return boolean success flag for the specified kernel.
394
395 @param kernel: information of the given kernel, either _KERNEL_A
396 or _KERNEL_B.
397 """
398 return self._cgpt('-S', kernel) != 0
399
400
401 def _get_kernel_tries(self, kernel):
402 """Return tries count for the specified kernel.
403
404 @param kernel: information of the given kernel, either _KERNEL_A
405 or _KERNEL_B.
406 """
407 return self._cgpt('-T', kernel)
408
409
Richard Barnette3e8b2282018-05-15 20:42:20 +0000410 def _get_last_update_error(self):
Shuqian Zhaod9992722016-02-29 12:26:38 -0800411 """Get the last autoupdate error code."""
Richard Barnette3e8b2282018-05-15 20:42:20 +0000412 command_result = self._run(
413 '%s --last_attempt_error' % _UPDATER_BIN)
414 return command_result.stdout.strip().replace('\n', ', ')
Shuqian Zhaod9992722016-02-29 12:26:38 -0800415
416
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800417 def _base_update_handler_no_retry(self, run_args):
Shuqian Zhaod9992722016-02-29 12:26:38 -0800418 """Base function to handle a remote update ssh call.
419
420 @param run_args: Dictionary of args passed to ssh_host.run function.
Shuqian Zhaod9992722016-02-29 12:26:38 -0800421
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800422 @throws: intercepts and re-throws all exceptions
Shuqian Zhaod9992722016-02-29 12:26:38 -0800423 """
Shuqian Zhaod9992722016-02-29 12:26:38 -0800424 try:
425 self.host.run(**run_args)
Shuqian Zhaod9992722016-02-29 12:26:38 -0800426 except Exception as e:
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800427 logging.debug('exception in update handler: %s', e)
428 raise e
Shuqian Zhaod9992722016-02-29 12:26:38 -0800429
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800430
431 def _base_update_handler(self, run_args, err_msg_prefix=None):
432 """Handle a remote update ssh call, possibly with retries.
433
434 @param run_args: Dictionary of args passed to ssh_host.run function.
435 @param err_msg_prefix: Prefix of the exception error message.
436 """
437 def exception_handler(e):
438 """Examines exceptions and returns True if the update handler
439 should be retried.
440
441 @param e: the exception intercepted by the retry util.
442 """
443 return (isinstance(e, error.AutoservSSHTimeout) or
444 (isinstance(e, error.GenericHostRunError) and
445 hasattr(e, 'description') and
446 (re.search('ERROR_CODE=37', e.description) or
447 re.search('generic error .255.', e.description))))
448
449 try:
450 # Try the update twice (arg 2 is max_retry, not including the first
451 # call). Some exceptions may be caught by the retry handler.
452 retry_util.GenericRetry(exception_handler, 1,
453 self._base_update_handler_no_retry,
454 run_args)
455 except Exception as e:
456 message = err_msg_prefix + ': ' + str(e)
457 raise RootFSUpdateError(message)
Shuqian Zhaod9992722016-02-29 12:26:38 -0800458
459
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800460 def _wait_for_update_service(self):
461 """Ensure that the update engine daemon is running, possibly
462 by waiting for it a bit in case the DUT just rebooted and the
463 service hasn't started yet.
464 """
465 def handler(e):
466 """Retry exception handler.
467
468 Assumes that the error is due to the update service not having
469 started yet.
470
471 @param e: the exception intercepted by the retry util.
472 """
473 if isinstance(e, error.AutoservRunError):
474 logging.debug('update service check exception: %s\n'
475 'retrying...', e)
476 return True
477 else:
478 return False
479
480 # Retry at most three times, every 5s.
481 status = retry_util.GenericRetry(handler, 3,
482 self.check_update_status,
483 sleep=5)
484
485 # Expect the update engine to be idle.
486 if status != UPDATER_IDLE:
Richard Barnette9d43e562018-06-05 17:20:10 +0000487 raise RootFSUpdateError(
488 'Update engine status is %s (%s was expected).'
489 % (status, UPDATER_IDLE))
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800490
491
Richard Barnette55d1af82018-05-22 23:40:14 +0000492 def _reset_update_engine(self):
493 """Resets the host to prepare for a clean update regardless of state."""
494 self._run('stop ui || true')
495 self._run('stop update-engine || true')
496 self._run('start update-engine')
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800497 self._wait_for_update_service()
498
Richard Barnette55d1af82018-05-22 23:40:14 +0000499
500 def _reset_stateful_partition(self):
501 """Clear any pending stateful update request."""
Richard Barnette18fd5842018-05-25 18:21:14 +0000502 self._run('%s --stateful_change=reset 2>&1'
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700503 % self._get_stateful_update_script())
Richard Barnette3ef29a82018-06-28 13:52:54 -0700504 self._run('rm -f %s' % _TARGET_VERSION)
505
506
507 def _set_target_version(self):
508 """Set the "target version" for the update."""
509 version_number = self.update_version.split('-')[1]
510 self._run('echo %s > %s' % (version_number, _TARGET_VERSION))
Richard Barnette55d1af82018-05-22 23:40:14 +0000511
512
513 def _revert_boot_partition(self):
514 """Revert the boot partition."""
515 part = self._rootdev('-s')
516 logging.warning('Reverting update; Boot partition will be %s', part)
517 return self._run('/postinst %s 2>&1' % part)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700518
519
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700520 def _verify_kernel_state(self):
521 """Verify that the next kernel to boot is correct for update.
522
523 This tests that the kernel state is correct for a successfully
524 downloaded and installed update. That is, the next kernel to
525 boot must be the currently inactive kernel.
526
527 @raise RootFSUpdateError if the DUT next kernel isn't the
528 expected next kernel.
529 """
530 inactive_kernel = self.get_kernel_state()[1]
531 next_kernel = self._get_next_kernel()
532 if next_kernel != inactive_kernel:
533 raise RootFSUpdateError(
534 'Update failed. The kernel for next boot is %s, '
535 'but %s was expected.'
536 % (next_kernel['name'], inactive_kernel['name']))
537 return inactive_kernel
538
539
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700540 def _verify_update_completed(self):
541 """Verifies that an update has completed.
542
Richard Barnette9d43e562018-06-05 17:20:10 +0000543 @raise RootFSUpdateError if the DUT doesn't indicate that
544 download is complete and the DUT is ready for reboot.
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700545 """
546 status = self.check_update_status()
547 if status != UPDATER_NEED_REBOOT:
Shuqian Zhaod9992722016-02-29 12:26:38 -0800548 error_msg = ''
549 if status == UPDATER_IDLE:
Richard Barnette3e8b2282018-05-15 20:42:20 +0000550 error_msg = 'Update error: %s' % self._get_last_update_error()
Richard Barnette9d43e562018-06-05 17:20:10 +0000551 raise RootFSUpdateError(
552 'Update engine status is %s (%s was expected). %s'
553 % (status, UPDATER_NEED_REBOOT, error_msg))
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700554 return self._verify_kernel_state()
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700555
556
Richard Barnette55d1af82018-05-22 23:40:14 +0000557 def trigger_update(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000558 """Triggers a background update."""
559 # If this function is called immediately after reboot (which it
560 # can be), there is no guarantee that the update engine is up
561 # and running yet, so wait for it.
Richard Barnette55d1af82018-05-22 23:40:14 +0000562 self._wait_for_update_service()
563
564 autoupdate_cmd = ('%s --check_for_update --omaha_url=%s' %
565 (_UPDATER_BIN, self.update_url))
566 run_args = {'command': autoupdate_cmd}
567 err_prefix = 'Failed to trigger an update on %s. ' % self.host.hostname
568 logging.info('Triggering update via: %s', autoupdate_cmd)
569 metric_fields = {'success': False}
570 try:
571 self._base_update_handler(run_args, err_prefix)
572 metric_fields['success'] = True
573 finally:
574 c = metrics.Counter('chromeos/autotest/autoupdater/trigger')
Richard Barnette621a8e42018-06-25 17:34:11 -0700575 metric_fields.update(_get_metric_fields(self.update_url))
Richard Barnette55d1af82018-05-22 23:40:14 +0000576 c.increment(fields=metric_fields)
577
578
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700579 def update_image(self):
Richard Barnette18fd5842018-05-25 18:21:14 +0000580 """Updates the device root FS and kernel and verifies success."""
Shuqian Zhaofe4d62e2016-06-23 14:46:45 -0700581 autoupdate_cmd = ('%s --update --omaha_url=%s' %
Richard Barnette3e8b2282018-05-15 20:42:20 +0000582 (_UPDATER_BIN, self.update_url))
David Haddock76a4c882017-12-13 18:50:09 -0800583 if not self.interactive:
584 autoupdate_cmd = '%s --interactive=false' % autoupdate_cmd
Shuqian Zhaod9992722016-02-29 12:26:38 -0800585 run_args = {'command': autoupdate_cmd, 'timeout': 3600}
586 err_prefix = ('Failed to install device image using payload at %s '
587 'on %s. ' % (self.update_url, self.host.hostname))
588 logging.info('Updating image via: %s', autoupdate_cmd)
Allen Li1a5cc0a2017-06-20 14:08:59 -0700589 metric_fields = {'success': False}
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800590 try:
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800591 self._base_update_handler(run_args, err_prefix)
Allen Li1a5cc0a2017-06-20 14:08:59 -0700592 metric_fields['success'] = True
593 finally:
Allen Li1a5cc0a2017-06-20 14:08:59 -0700594 c = metrics.Counter('chromeos/autotest/autoupdater/update')
Richard Barnette621a8e42018-06-25 17:34:11 -0700595 metric_fields.update(_get_metric_fields(self.update_url))
Allen Li1a5cc0a2017-06-20 14:08:59 -0700596 c.increment(fields=metric_fields)
Richard Barnette4d211c92018-05-24 18:56:08 +0000597 return self._verify_update_completed()
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700598
599
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700600 def _get_remote_script(self, script_name):
601 """Ensure that `script_name` is present on the DUT.
Chris Sosa5e4246b2012-05-22 18:05:22 -0700602
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700603 The given script (e.g. `stateful_update`) may be present in the
604 stateful partition under /usr/local/bin, or we may have to
605 download it from the devserver.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700606
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700607 Determine whether the script is present or must be downloaded
608 and download if necessary. Then, return a command fragment
609 sufficient to run the script from whereever it now lives on the
610 DUT.
Richard Barnette9d43e562018-06-05 17:20:10 +0000611
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700612 @param script_name The name of the script as expected in
613 /usr/local/bin and on the devserver.
614 @return A string with the command (minus arguments) that will
615 run the target script.
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700616 """
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700617 remote_script = '/usr/local/bin/%s' % script_name
618 if self.host.path_exists(remote_script):
619 return remote_script
620 remote_tmp_script = '/tmp/%s' % script_name
621 server_name = urlparse.urlparse(self.update_url)[1]
622 script_url = 'http://%s/static/%s' % (server_name, script_name)
623 fetch_script = (
624 'curl -o %s %s && head -1 %s | grep "^#!" | sed "s/#!//"') % (
625 remote_tmp_script, script_url, remote_tmp_script)
626 script_interpreter = self._run(fetch_script,
627 ignore_status=True).stdout.strip()
628 if not script_interpreter:
629 return None
630 return '%s %s' % (script_interpreter, remote_tmp_script)
Chris Sosa5e4246b2012-05-22 18:05:22 -0700631
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700632
633 def _get_stateful_update_script(self):
634 """Returns a command to run the stateful update script.
635
636 Find `stateful_update` on the target or install it, as
637 necessary. If installation fails, raise an exception.
638
639 @raise StatefulUpdateError if the script can't be found or
640 installed.
641 @return A string that can be joined with arguments to run the
642 `stateful_update` command on the DUT.
643 """
644 script_command = self._get_remote_script(_STATEFUL_UPDATE_SCRIPT)
645 if not script_command:
646 raise StatefulUpdateError('Could not install %s on DUT'
Richard Barnette9d43e562018-06-05 17:20:10 +0000647 % _STATEFUL_UPDATE_SCRIPT)
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700648 return script_command
Chris Sosa5e4246b2012-05-22 18:05:22 -0700649
650
Chris Sosac1932172013-10-16 13:28:53 -0700651 def rollback_rootfs(self, powerwash):
652 """Triggers rollback and waits for it to complete.
653
654 @param powerwash: If true, powerwash as part of rollback.
655
656 @raise RootFSUpdateError if anything went wrong.
Chris Sosac1932172013-10-16 13:28:53 -0700657 """
Dan Shi549fb822015-03-24 18:01:11 -0700658 version = self.host.get_release_version()
Chris Sosac8617522014-06-09 23:22:26 +0000659 # Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches
660 # X.Y.Z. This version split just pulls the first part out.
661 try:
662 build_number = int(version.split('.')[0])
663 except ValueError:
664 logging.error('Could not parse build number.')
665 build_number = 0
666
667 if build_number >= 5772:
Richard Barnette3e8b2282018-05-15 20:42:20 +0000668 can_rollback_cmd = '%s --can_rollback' % _UPDATER_BIN
Chris Sosac8617522014-06-09 23:22:26 +0000669 logging.info('Checking for rollback.')
670 try:
671 self._run(can_rollback_cmd)
672 except error.AutoservRunError as e:
673 raise RootFSUpdateError("Rollback isn't possible on %s: %s" %
674 (self.host.hostname, str(e)))
675
Richard Barnette3e8b2282018-05-15 20:42:20 +0000676 rollback_cmd = '%s --rollback --follow' % _UPDATER_BIN
Chris Sosac1932172013-10-16 13:28:53 -0700677 if not powerwash:
Dan Shif3a35f72016-01-25 11:18:14 -0800678 rollback_cmd += ' --nopowerwash'
Chris Sosac1932172013-10-16 13:28:53 -0700679
Chris Sosac8617522014-06-09 23:22:26 +0000680 logging.info('Performing rollback.')
Chris Sosac1932172013-10-16 13:28:53 -0700681 try:
682 self._run(rollback_cmd)
Chris Sosac1932172013-10-16 13:28:53 -0700683 except error.AutoservRunError as e:
684 raise RootFSUpdateError('Rollback failed on %s: %s' %
685 (self.host.hostname, str(e)))
686
687 self._verify_update_completed()
688
Gilad Arnold0ed760c2012-11-05 23:42:53 -0800689
Chris Sosa72312602013-04-16 15:01:56 -0700690 def update_stateful(self, clobber=True):
691 """Updates the stateful partition.
692
693 @param clobber: If True, a clean stateful installation.
Richard Barnette9d43e562018-06-05 17:20:10 +0000694
695 @raise StatefulUpdateError if the update script fails to
696 complete successfully.
Chris Sosa72312602013-04-16 15:01:56 -0700697 """
Chris Sosa77556d82012-04-05 15:23:14 -0700698 logging.info('Updating stateful partition...')
Richard Barnette18fd5842018-05-25 18:21:14 +0000699 statefuldev_url = self.update_url.replace('update', 'static')
Chris Sosaa3ac2152012-05-23 22:23:13 -0700700
Dale Curtis5c32c722011-05-04 19:24:23 -0700701 # Attempt stateful partition update; this must succeed so that the newly
702 # installed host is testable after update.
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700703 statefuldev_cmd = [self._get_stateful_update_script(), statefuldev_url]
Chris Sosa72312602013-04-16 15:01:56 -0700704 if clobber:
705 statefuldev_cmd.append('--stateful_change=clean')
706
707 statefuldev_cmd.append('2>&1')
Dale Curtis5c32c722011-05-04 19:24:23 -0700708 try:
Dan Shi205b8732016-01-25 10:56:22 -0800709 self._run(' '.join(statefuldev_cmd), timeout=1200)
Dale Curtis5c32c722011-05-04 19:24:23 -0700710 except error.AutoservRunError:
Richard Barnette18fd5842018-05-25 18:21:14 +0000711 raise StatefulUpdateError(
Gilad Arnold62cf3a42015-10-01 09:15:25 -0700712 'Failed to perform stateful update on %s' %
713 self.host.hostname)
Dale Curtis5c32c722011-05-04 19:24:23 -0700714
Chris Sosaa3ac2152012-05-23 22:23:13 -0700715
Richard Barnette54d14f52018-05-18 16:39:49 +0000716 def verify_boot_expectations(self, expected_kernel, rollback_message):
Richard Barnette55d1af82018-05-22 23:40:14 +0000717 """Verifies that we fully booted given expected kernel state.
718
719 This method both verifies that we booted using the correct kernel
720 state and that the OS has marked the kernel as good.
721
Richard Barnette54d14f52018-05-18 16:39:49 +0000722 @param expected_kernel: kernel that we are verifying with,
Richard Barnette55d1af82018-05-22 23:40:14 +0000723 i.e. I expect to be booted onto partition 4 etc. See output of
724 get_kernel_state.
Richard Barnette9d43e562018-06-05 17:20:10 +0000725 @param rollback_message: string include in except message text
Richard Barnette55d1af82018-05-22 23:40:14 +0000726 if we booted with the wrong partition.
727
Richard Barnette9d43e562018-06-05 17:20:10 +0000728 @raise NewBuildUpdateError if any of the various checks fail.
Richard Barnette55d1af82018-05-22 23:40:14 +0000729 """
730 # Figure out the newly active kernel.
Richard Barnette54d14f52018-05-18 16:39:49 +0000731 active_kernel = self.get_kernel_state()[0]
Richard Barnette55d1af82018-05-22 23:40:14 +0000732
733 # Check for rollback due to a bad build.
Richard Barnette54d14f52018-05-18 16:39:49 +0000734 if active_kernel != expected_kernel:
Richard Barnette55d1af82018-05-22 23:40:14 +0000735
736 # Kernel crash reports should be wiped between test runs, but
737 # may persist from earlier parts of the test, or from problems
738 # with provisioning.
739 #
740 # Kernel crash reports will NOT be present if the crash happened
741 # before encrypted stateful is mounted.
742 #
743 # TODO(dgarrett): Integrate with server/crashcollect.py at some
744 # point.
745 kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash')
746 if kernel_crashes:
747 rollback_message += ': kernel_crash'
748 logging.debug('Found %d kernel crash reports:',
749 len(kernel_crashes))
750 # The crash names contain timestamps that may be useful:
751 # kernel.20131207.005945.0.kcrash
752 for crash in kernel_crashes:
753 logging.debug(' %s', os.path.basename(crash))
754
755 # Print out some information to make it easier to debug
756 # the rollback.
757 logging.debug('Dumping partition table.')
758 self._run('cgpt show $(rootdev -s -d)')
759 logging.debug('Dumping crossystem for firmware debugging.')
760 self._run('crossystem --all')
Richard Barnette9d43e562018-06-05 17:20:10 +0000761 raise NewBuildUpdateError(self.update_version, rollback_message)
Richard Barnette55d1af82018-05-22 23:40:14 +0000762
763 # Make sure chromeos-setgoodkernel runs.
764 try:
765 utils.poll_for_condition(
Richard Barnette54d14f52018-05-18 16:39:49 +0000766 lambda: (self._get_kernel_tries(active_kernel) == 0
767 and self._get_kernel_success(active_kernel)),
Richard Barnette9d43e562018-06-05 17:20:10 +0000768 exception=RootFSUpdateError(),
Richard Barnette55d1af82018-05-22 23:40:14 +0000769 timeout=_KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
Richard Barnette9d43e562018-06-05 17:20:10 +0000770 except RootFSUpdateError:
Richard Barnette55d1af82018-05-22 23:40:14 +0000771 services_status = self._run('status system-services').stdout
772 if services_status != 'system-services start/running\n':
773 event = ('Chrome failed to reach login screen')
774 else:
775 event = ('update-engine failed to call '
776 'chromeos-setgoodkernel')
Richard Barnette9d43e562018-06-05 17:20:10 +0000777 raise NewBuildUpdateError(self.update_version, event)
Richard Barnette55d1af82018-05-22 23:40:14 +0000778
779
Richard Barnette14ee84c2018-05-18 20:23:42 +0000780 def _prepare_host(self):
781 """Make sure the target DUT is working and ready for update.
782
783 Initially, the target DUT's state is unknown. The DUT is
784 expected to be online, but we strive to be forgiving if Chrome
785 and/or the update engine aren't fully functional.
786 """
787 # Summary of work, and the rationale:
788 # 1. Reboot, because it's a good way to clear out problems.
789 # 2. Touch the PROVISION_FAILED file, to allow repair to detect
790 # failure later.
791 # 3. Run the hook for host class specific preparation.
792 # 4. Stop Chrome, because the system is designed to eventually
793 # reboot if Chrome is stuck in a crash loop.
794 # 5. Force `update-engine` to start, because if Chrome failed
795 # to start properly, the status of the `update-engine` job
796 # will be uncertain.
797 self._reset_stateful_partition()
798 self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
799 self._run('touch %s' % PROVISION_FAILED)
800 self.host.prepare_for_update()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700801 self._reset_update_engine()
Richard Barnette14ee84c2018-05-18 20:23:42 +0000802 logging.info('Updating from version %s to %s.',
803 self.host.get_release_version(),
804 self.update_version)
805
806
807 def _verify_devserver(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000808 """Check that our chosen devserver is still working.
809
810 @raise DevServerError if the devserver fails any sanity check.
811 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000812 server = 'http://%s' % urlparse.urlparse(self.update_url)[1]
813 try:
814 if not dev_server.ImageServer.devserver_healthy(server):
Richard Barnette9d43e562018-06-05 17:20:10 +0000815 raise DevServerError(
816 server, 'Devserver is not healthy')
Richard Barnette14ee84c2018-05-18 20:23:42 +0000817 except Exception as e:
Richard Barnette9d43e562018-06-05 17:20:10 +0000818 raise DevServerError(
819 server, 'Devserver is not up and available')
Richard Barnette14ee84c2018-05-18 20:23:42 +0000820
821
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700822 def _install_via_update_engine(self):
823 """Install an updating using the production AU flow.
824
825 This uses the standard AU flow and the `stateful_update` script
826 to download and install a root FS, kernel and stateful
827 filesystem content.
828
829 @return The kernel expected to be booted next.
830 """
831 logging.info('Installing image using update_engine.')
832 expected_kernel = self.update_image()
833 self.update_stateful()
Richard Barnette3ef29a82018-06-28 13:52:54 -0700834 self._set_target_version()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700835 return expected_kernel
836
837
838 def _install_via_quick_provision(self):
839 """Install an updating using the `quick-provision` script.
840
841 This uses the `quick-provision` script to download and install
842 a root FS, kernel and stateful filesystem content.
843
844 @return The kernel expected to be booted next.
845 """
846 build_re = global_config.global_config.get_config_value(
847 'CROS', 'quick_provision_build_regex', type=str, default='')
848 image_name = url_to_image_name(self.update_url)
849 if not build_re or re.match(build_re, image_name) is None:
850 logging.info('Not eligible for quick-provision.')
851 return None
852 logging.info('Installing image using quick-provision.')
853 provision_command = self._get_remote_script(_QUICK_PROVISION_SCRIPT)
854 server_name = urlparse.urlparse(self.update_url)[1]
855 static_url = 'http://%s/static' % server_name
856 command = '%s --noreboot %s %s' % (
857 provision_command, image_name, static_url)
858 try:
859 self._run(command)
Richard Barnette3ef29a82018-06-28 13:52:54 -0700860 self._set_target_version()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700861 return self._verify_kernel_state()
862 except Exception:
863 # N.B. We handle only `Exception` here. Non-Exception
864 # classes (such as KeyboardInterrupt) are handled by our
865 # caller.
866 logging.exception('quick-provision script failed; '
867 'will fall back to update_engine.')
868 self._revert_boot_partition()
869 self._reset_stateful_partition()
870 self._reset_update_engine()
871 return None
872
873
Richard Barnette54d14f52018-05-18 16:39:49 +0000874 def _install_update(self):
Richard Barnette0beb14b2018-05-15 18:07:52 +0000875 """Install the requested image on the DUT, but don't start it.
876
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700877 This downloads and installs a root FS, kernel and stateful
878 filesystem content. This does not reboot the DUT, so the update
879 is merely pending when the method returns.
880
881 @return The kernel expected to be booted next.
Dan Shi0f466e82013-02-22 15:44:58 -0800882 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000883 logging.info('Installing image at %s onto %s',
884 self.update_url, self.host.hostname)
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200885 try:
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700886 return (self._install_via_quick_provision()
887 or self._install_via_update_engine())
Dale Curtis1e973182011-07-12 18:21:36 -0700888 except:
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700889 # N.B. This handling code includes non-Exception classes such
890 # as KeyboardInterrupt. We need to clean up, but we also must
891 # re-raise.
Richard Barnette14ee84c2018-05-18 20:23:42 +0000892 self._revert_boot_partition()
893 self._reset_stateful_partition()
Richard Barnettee86b1ce2018-06-07 10:37:23 -0700894 self._reset_update_engine()
Dale Curtis1e973182011-07-12 18:21:36 -0700895 # Collect update engine logs in the event of failure.
896 if self.host.job:
Aviv Keshet2610d3e2016-06-01 16:37:01 -0700897 logging.info('Collecting update engine logs due to failure...')
Dale Curtis1e973182011-07-12 18:21:36 -0700898 self.host.get_file(
Richard Barnette3e8b2282018-05-15 20:42:20 +0000899 _UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
Gilad Arnold0c0df732015-09-21 06:37:59 -0700900 preserve_perm=False)
Richard Barnette3e8b2282018-05-15 20:42:20 +0000901 _list_image_dir_contents(self.update_url)
Dale Curtis1e973182011-07-12 18:21:36 -0700902 raise
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200903
904
Richard Barnette14ee84c2018-05-18 20:23:42 +0000905 def _complete_update(self, expected_kernel):
906 """Finish the update, and confirm that it succeeded.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000907
Richard Barnette14ee84c2018-05-18 20:23:42 +0000908 Initial condition is that the target build has been downloaded
909 and installed on the DUT, but has not yet been booted. This
910 function is responsible for rebooting the DUT, and checking that
911 the new build is running successfully.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000912
Richard Barnette14ee84c2018-05-18 20:23:42 +0000913 @param expected_kernel: kernel expected to be active after reboot.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000914 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000915 # Regarding the 'crossystem' command below: In some cases,
916 # the update flow puts the TPM into a state such that it
917 # fails verification. We don't know why. However, this
918 # call papers over the problem by clearing the TPM during
919 # the reboot.
920 #
921 # We ignore failures from 'crossystem'. Although failure
922 # here is unexpected, and could signal a bug, the point of
923 # the exercise is to paper over problems; allowing this to
924 # fail would defeat the purpose.
925 self._run('crossystem clear_tpm_owner_request=1',
926 ignore_status=True)
927 self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
928
Richard Barnette0beb14b2018-05-15 18:07:52 +0000929 # Touch the lab machine file to leave a marker that
930 # distinguishes this image from other test images.
931 # Afterwards, we must re-run the autoreboot script because
932 # it depends on the _LAB_MACHINE_FILE.
933 autoreboot_cmd = ('FILE="%s" ; [ -f "$FILE" ] || '
934 '( touch "$FILE" ; start autoreboot )')
Richard Barnette3e8b2282018-05-15 20:42:20 +0000935 self._run(autoreboot_cmd % _LAB_MACHINE_FILE)
Richard Barnette0beb14b2018-05-15 18:07:52 +0000936 self.verify_boot_expectations(
937 expected_kernel, rollback_message=
938 'Build %s failed to boot on %s; system rolled back to previous '
939 'build' % (self.update_version, self.host.hostname))
940
941 logging.debug('Cleaning up old autotest directories.')
942 try:
943 installed_autodir = autotest.Autotest.get_installed_autodir(
944 self.host)
945 self._run('rm -rf ' + installed_autodir)
946 except autotest.AutodirNotFoundError:
947 logging.debug('No autotest installed directory found.')
948
949
Richard Barnette621a8e42018-06-25 17:34:11 -0700950 def _run_update_steps(self):
951 """Perform a full update of a DUT, with diagnosis for failures.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000952
Richard Barnette621a8e42018-06-25 17:34:11 -0700953 Run the individual steps of the update. If a step fails, make
954 sure that the exception raised describes the failure with a
955 diagnosis based on the step that failed.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000956
Richard Barnette621a8e42018-06-25 17:34:11 -0700957 @raise HostUpdateError if a failure is caused by a problem on
Richard Barnette9d43e562018-06-05 17:20:10 +0000958 the DUT prior to the update.
Richard Barnette621a8e42018-06-25 17:34:11 -0700959 @raise ImageInstallError if a failure occurs during download
Richard Barnette9d43e562018-06-05 17:20:10 +0000960 and install of the update and cannot be definitively
961 blamed on either the DUT or the devserver.
Richard Barnette621a8e42018-06-25 17:34:11 -0700962 @raise NewBuildUpdateError if a failure occurs because the
Richard Barnette9d43e562018-06-05 17:20:10 +0000963 new build fails to function correctly.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000964 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000965 self._verify_devserver()
Richard Barnette9d43e562018-06-05 17:20:10 +0000966
967 try:
968 self._prepare_host()
969 except _AttributedUpdateError:
970 raise
971 except Exception as e:
972 logging.exception('Failure preparing host prior to update.')
973 raise HostUpdateError(self.host.hostname, str(e))
974
975 try:
976 expected_kernel = self._install_update()
977 except _AttributedUpdateError:
978 raise
979 except Exception as e:
980 logging.exception('Failure during download and install.')
Richard Barnette621a8e42018-06-25 17:34:11 -0700981 server_name = dev_server.get_resolved_hostname(self.update_url)
Richard Barnette9d43e562018-06-05 17:20:10 +0000982 raise ImageInstallError(self.host.hostname, server_name, str(e))
983
984 try:
985 self._complete_update(expected_kernel)
986 except _AttributedUpdateError:
987 raise
988 except Exception as e:
989 logging.exception('Failure from build after update.')
990 raise NewBuildUpdateError(self.update_version, str(e))
Richard Barnette0beb14b2018-05-15 18:07:52 +0000991
Richard Barnette621a8e42018-06-25 17:34:11 -0700992
993 def run_update(self):
994 """Perform a full update of a DUT in the test lab.
995
996 This downloads and installs the root FS and stateful partition
997 content needed for the update specified in `self.host` and
998 `self.update_url`. The update is performed according to the
999 requirements for provisioning a DUT for testing the requested
1000 build.
1001
1002 At the end of the procedure, metrics are reported describing the
1003 outcome of the operation.
1004
1005 @returns A tuple of the form `(image_name, attributes)`, where
1006 `image_name` is the name of the image installed, and
1007 `attributes` is new attributes to be applied to the DUT.
1008 """
1009 start_time = time.time()
1010 failure_reason = None
1011 server_name = dev_server.get_resolved_hostname(self.update_url)
1012 metrics.Counter(_metric_name('install')).increment(
1013 fields={'devserver': server_name})
1014 try:
1015 self._run_update_steps()
1016 except _AttributedUpdateError as e:
1017 failure_reason = e.failure_summary
1018 raise
1019 except Exception as e:
1020 failure_reason = 'Unknown failure'
1021 raise
1022 finally:
1023 end_time = time.time()
Richard Barnette045eb5d2018-07-09 14:07:01 -07001024 _emit_updater_metrics(
Richard Barnette621a8e42018-06-25 17:34:11 -07001025 self.update_url, self.host.hostname,
1026 failure_reason, end_time - start_time)
1027
Richard Barnette0beb14b2018-05-15 18:07:52 +00001028 image_name = url_to_image_name(self.update_url)
1029 # update_url is different from devserver url needed to stage autotest
1030 # packages, therefore, resolve a new devserver url here.
1031 devserver_url = dev_server.ImageServer.resolve(
1032 image_name, self.host.hostname).url()
1033 repo_url = tools.get_package_url(devserver_url, image_name)
1034 return image_name, {ds_constants.JOB_REPO_URL: repo_url}