blob: 4dbfd499e7fbe0cea6dbf58769f41137a5e158df [file] [log] [blame]
Chris Sosa5e4246b2012-05-22 18:05:22 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Sean O'Connor5346e4e2010-08-12 18:49:24 +02002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Don Garrett56b1cc82013-12-06 17:49:20 -08005import glob
Sean O'Connor5346e4e2010-08-12 18:49:24 +02006import logging
Dale Curtis5c32c722011-05-04 19:24:23 -07007import os
Sean O'Connor5346e4e2010-08-12 18:49:24 +02008import re
Prashanth B32baa9b2014-03-13 13:23:01 -07009import urllib2
Richard Barnette0beb14b2018-05-15 18:07:52 +000010import urlparse
Sean O'Connor5346e4e2010-08-12 18:49:24 +020011
Chris Sosa65425082013-10-16 13:26:22 -070012from autotest_lib.client.bin import utils
Dale Curtis5c32c722011-05-04 19:24:23 -070013from autotest_lib.client.common_lib import error, global_config
Prashanth B32baa9b2014-03-13 13:23:01 -070014from autotest_lib.client.common_lib.cros import dev_server
Richard Barnette0beb14b2018-05-15 18:07:52 +000015from autotest_lib.server import autotest
Shelley Chen61d28982016-10-28 09:40:20 -070016from autotest_lib.server import utils as server_utils
Richard Barnette0beb14b2018-05-15 18:07:52 +000017from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
18from autotest_lib.server.cros.dynamic_suite import tools
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -080019from chromite.lib import retry_util
Dan Shif3a35f72016-01-25 11:18:14 -080020
Shelley Chen16b8df32016-10-27 16:24:21 -070021try:
22 from chromite.lib import metrics
Dan Shi5e2efb72017-02-07 11:40:23 -080023except ImportError:
24 metrics = utils.metrics_mock
Sean O'Connor5346e4e2010-08-12 18:49:24 +020025
Gwendal Grignou3e96cc22017-06-07 16:22:51 -070026
Dale Curtis5c32c722011-05-04 19:24:23 -070027# Local stateful update path is relative to the CrOS source directory.
Sean O'Connor5346e4e2010-08-12 18:49:24 +020028UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
Sean Oc053dfe2010-08-23 18:22:26 +020029UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
beeps5e8c45a2013-12-17 22:05:11 -080030# A list of update engine client states that occur after an update is triggered.
31UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE',
32 'UPDATE_STATUS_UPDATE_AVAILABLE',
33 'UPDATE_STATUS_DOWNLOADING',
34 'UPDATE_STATUS_FINALIZING']
Sean O'Connor5346e4e2010-08-12 18:49:24 +020035
Richard Barnette0beb14b2018-05-15 18:07:52 +000036
Richard Barnette3e8b2282018-05-15 20:42:20 +000037_STATEFUL_UPDATE_SCRIPT = 'stateful_update'
Richard Barnette3e8b2282018-05-15 20:42:20 +000038
39_UPDATER_BIN = '/usr/bin/update_engine_client'
40_UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine']
41
42_KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3}
43_KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5}
44
45# Time to wait for new kernel to be marked successful after
46# auto update.
47_KERNEL_UPDATE_TIMEOUT = 120
48
49
Richard Barnette0beb14b2018-05-15 18:07:52 +000050# PROVISION_FAILED - A flag file to indicate provision failures. The
51# file is created at the start of any AU procedure (see
Richard Barnette9d43e562018-06-05 17:20:10 +000052# `ChromiumOSUpdater._prepare_host()`). The file's location in
Richard Barnette0beb14b2018-05-15 18:07:52 +000053# stateful means that on successul update it will be removed. Thus, if
54# this file exists, it indicates that we've tried and failed in a
55# previous attempt to update.
56PROVISION_FAILED = '/var/tmp/provision_failed'
57
58
Richard Barnette3e8b2282018-05-15 20:42:20 +000059# A flag file used to enable special handling in lab DUTs. Some
60# parts of the system in Chromium OS test images will behave in ways
61# convenient to the test lab when this file is present. Generally,
62# we create this immediately after any update completes.
63_LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
64
65
Richard Barnette9d43e562018-06-05 17:20:10 +000066class RootFSUpdateError(error.TestFail):
Chris Sosa77556d82012-04-05 15:23:14 -070067 """Raised when the RootFS fails to update."""
Chris Sosa77556d82012-04-05 15:23:14 -070068
69
Richard Barnette9d43e562018-06-05 17:20:10 +000070class StatefulUpdateError(error.TestFail):
Chris Sosa77556d82012-04-05 15:23:14 -070071 """Raised when the stateful partition fails to update."""
Chris Sosa77556d82012-04-05 15:23:14 -070072
73
Richard Barnette9d43e562018-06-05 17:20:10 +000074class _AttributedUpdateError(error.TestFail):
75 """Update failure with an attributed cause."""
76
77 def __init__(self, attribution, msg):
78 super(_AttributedUpdateError, self).__init__(
79 '%s: %s' % (attribution, msg))
80
81
82class HostUpdateError(_AttributedUpdateError):
83 """Failure updating a DUT attributable to the DUT.
84
85 This class of exception should be raised when the most likely cause
86 of failure was a condition existing on the DUT prior to the update,
87 such as a hardware problem, or a bug in the software on the DUT.
88 """
89
90 def __init__(self, hostname, msg):
91 super(HostUpdateError, self).__init__(
92 'Error on %s prior to update' % hostname, msg)
93
94
95class DevServerError(_AttributedUpdateError):
96 """Failure updating a DUT attributable to the devserver.
97
98 This class of exception should be raised when the most likely cause
99 of failure was the devserver serving the target image for update.
100 """
101
102 def __init__(self, devserver, msg):
103 super(DevServerError, self).__init__(
104 'Devserver error on %s' % devserver, msg)
105
106
107class ImageInstallError(_AttributedUpdateError):
108 """Failure updating a DUT when installing from the devserver.
109
110 This class of exception should be raised when the target DUT fails
111 to download and install the target image from the devserver, and
112 either the devserver or the DUT might be at fault.
113 """
114
115 def __init__(self, hostname, devserver, msg):
116 super(ImageInstallError, self).__init__(
117 'Download and install failed from %s onto %s'
118 % (devserver, hostname), msg)
119
120
121class NewBuildUpdateError(_AttributedUpdateError):
122 """Failure updating a DUT attributable to the target build.
123
124 This class of exception should be raised when updating to a new
125 build fails, and the most likely cause of the failure is a bug in
126 the newly installed target build.
127 """
128
129 def __init__(self, update_version, msg):
130 super(NewBuildUpdateError, self).__init__(
131 'Failure in build %s' % update_version, msg)
132
133
Richard Barnette3e8b2282018-05-15 20:42:20 +0000134def _url_to_version(update_url):
Dan Shi0f466e82013-02-22 15:44:58 -0800135 """Return the version based on update_url.
136
137 @param update_url: url to the image to update to.
138
139 """
Dale Curtisddfdb942011-07-14 13:59:24 -0700140 # The Chrome OS version is generally the last element in the URL. The only
141 # exception is delta update URLs, which are rooted under the version; e.g.,
142 # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
143 # strip off the au section of the path before reading the version.
Dan Shi5002cfc2013-04-29 10:45:05 -0700144 return re.sub('/au/.*', '',
145 urlparse.urlparse(update_url).path).split('/')[-1].strip()
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200146
147
Scott Zawalskieadbf702013-03-14 09:23:06 -0400148def url_to_image_name(update_url):
149 """Return the image name based on update_url.
150
151 From a URL like:
152 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
153 return lumpy-release/R27-3837.0.0
154
155 @param update_url: url to the image to update to.
156 @returns a string representing the image name in the update_url.
157
158 """
159 return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
160
161
Prashanth B32baa9b2014-03-13 13:23:01 -0700162def _get_devserver_build_from_update_url(update_url):
163 """Get the devserver and build from the update url.
164
165 @param update_url: The url for update.
166 Eg: http://devserver:port/update/build.
167
168 @return: A tuple of (devserver url, build) or None if the update_url
169 doesn't match the expected pattern.
170
171 @raises ValueError: If the update_url doesn't match the expected pattern.
172 @raises ValueError: If no global_config was found, or it doesn't contain an
173 image_url_pattern.
174 """
175 pattern = global_config.global_config.get_config_value(
176 'CROS', 'image_url_pattern', type=str, default='')
177 if not pattern:
178 raise ValueError('Cannot parse update_url, the global config needs '
179 'an image_url_pattern.')
180 re_pattern = pattern.replace('%s', '(\S+)')
181 parts = re.search(re_pattern, update_url)
182 if not parts or len(parts.groups()) < 2:
183 raise ValueError('%s is not an update url' % update_url)
184 return parts.groups()
185
186
Richard Barnette3e8b2282018-05-15 20:42:20 +0000187def _list_image_dir_contents(update_url):
Prashanth B32baa9b2014-03-13 13:23:01 -0700188 """Lists the contents of the devserver for a given build/update_url.
189
190 @param update_url: An update url. Eg: http://devserver:port/update/build.
191 """
192 if not update_url:
193 logging.warning('Need update_url to list contents of the devserver.')
194 return
195 error_msg = 'Cannot check contents of devserver, update url %s' % update_url
196 try:
197 devserver_url, build = _get_devserver_build_from_update_url(update_url)
198 except ValueError as e:
199 logging.warning('%s: %s', error_msg, e)
200 return
201 devserver = dev_server.ImageServer(devserver_url)
202 try:
203 devserver.list_image_dir(build)
204 # The devserver will retry on URLError to avoid flaky connections, but will
205 # eventually raise the URLError if it persists. All HTTPErrors get
206 # converted to DevServerExceptions.
207 except (dev_server.DevServerException, urllib2.URLError) as e:
208 logging.warning('%s: %s', error_msg, e)
209
210
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700211# TODO(garnold) This implements shared updater functionality needed for
212# supporting the autoupdate_EndToEnd server-side test. We should probably
213# migrate more of the existing ChromiumOSUpdater functionality to it as we
214# expand non-CrOS support in other tests.
Richard Barnette3e8b2282018-05-15 20:42:20 +0000215class ChromiumOSUpdater(object):
216 """Chromium OS specific DUT update functionality."""
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700217
Richard Barnette3e8b2282018-05-15 20:42:20 +0000218 def __init__(self, update_url, host=None, interactive=True):
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700219 """Initializes the object.
220
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700221 @param update_url: The URL we want the update to use.
222 @param host: A client.common_lib.hosts.Host implementation.
David Haddock76a4c882017-12-13 18:50:09 -0800223 @param interactive: Bool whether we are doing an interactive update.
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700224 """
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700225 self.update_url = update_url
226 self.host = host
David Haddock76a4c882017-12-13 18:50:09 -0800227 self.interactive = interactive
Richard Barnette3e8b2282018-05-15 20:42:20 +0000228 self.update_version = _url_to_version(update_url)
229
230
231 def _run(self, cmd, *args, **kwargs):
232 """Abbreviated form of self.host.run(...)"""
233 return self.host.run(cmd, *args, **kwargs)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700234
235
236 def check_update_status(self):
237 """Returns the current update engine state.
238
239 We use the `update_engine_client -status' command and parse the line
240 indicating the update state, e.g. "CURRENT_OP=UPDATE_STATUS_IDLE".
241 """
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800242 update_status = self.host.run(command='%s -status | grep CURRENT_OP' %
Richard Barnette3e8b2282018-05-15 20:42:20 +0000243 _UPDATER_BIN)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700244 return update_status.stdout.strip().split('=')[-1]
245
246
Richard Barnette55d1af82018-05-22 23:40:14 +0000247 def _rootdev(self, options=''):
248 """Returns the stripped output of rootdev <options>.
249
250 @param options: options to run rootdev.
251
252 """
253 return self._run('rootdev %s' % options).stdout.strip()
254
255
256 def get_kernel_state(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000257 """Returns the (<active>, <inactive>) kernel state as a pair.
258
259 @raise RootFSUpdateError if the DUT reports a root partition
260 number that isn't one of the known valid values.
261 """
Richard Barnette55d1af82018-05-22 23:40:14 +0000262 active_root = int(re.findall('\d+\Z', self._rootdev('-s'))[0])
263 if active_root == _KERNEL_A['root']:
264 return _KERNEL_A, _KERNEL_B
265 elif active_root == _KERNEL_B['root']:
266 return _KERNEL_B, _KERNEL_A
267 else:
Richard Barnette9d43e562018-06-05 17:20:10 +0000268 raise RootFSUpdateError(
269 'Encountered unknown root partition: %s' % active_root)
Richard Barnette55d1af82018-05-22 23:40:14 +0000270
271
Richard Barnette18fd5842018-05-25 18:21:14 +0000272 def _cgpt(self, flag, kernel):
273 """Return numeric cgpt value for the specified flag, kernel, device."""
274 return int(self._run('cgpt show -n -i %d %s $(rootdev -s -d)' % (
275 kernel['kernel'], flag)).stdout.strip())
Richard Barnette55d1af82018-05-22 23:40:14 +0000276
277
278 def _get_next_kernel(self):
279 """Return the kernel that has priority for the next boot."""
280 priority_a = self._cgpt('-P', _KERNEL_A)
281 priority_b = self._cgpt('-P', _KERNEL_B)
282 if priority_a > priority_b:
283 return _KERNEL_A
284 else:
285 return _KERNEL_B
286
287
288 def _get_kernel_success(self, kernel):
289 """Return boolean success flag for the specified kernel.
290
291 @param kernel: information of the given kernel, either _KERNEL_A
292 or _KERNEL_B.
293 """
294 return self._cgpt('-S', kernel) != 0
295
296
297 def _get_kernel_tries(self, kernel):
298 """Return tries count for the specified kernel.
299
300 @param kernel: information of the given kernel, either _KERNEL_A
301 or _KERNEL_B.
302 """
303 return self._cgpt('-T', kernel)
304
305
Richard Barnette3e8b2282018-05-15 20:42:20 +0000306 def _get_last_update_error(self):
Shuqian Zhaod9992722016-02-29 12:26:38 -0800307 """Get the last autoupdate error code."""
Richard Barnette3e8b2282018-05-15 20:42:20 +0000308 command_result = self._run(
309 '%s --last_attempt_error' % _UPDATER_BIN)
310 return command_result.stdout.strip().replace('\n', ', ')
Shuqian Zhaod9992722016-02-29 12:26:38 -0800311
312
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800313 def _base_update_handler_no_retry(self, run_args):
Shuqian Zhaod9992722016-02-29 12:26:38 -0800314 """Base function to handle a remote update ssh call.
315
316 @param run_args: Dictionary of args passed to ssh_host.run function.
Shuqian Zhaod9992722016-02-29 12:26:38 -0800317
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800318 @throws: intercepts and re-throws all exceptions
Shuqian Zhaod9992722016-02-29 12:26:38 -0800319 """
Shuqian Zhaod9992722016-02-29 12:26:38 -0800320 try:
321 self.host.run(**run_args)
Shuqian Zhaod9992722016-02-29 12:26:38 -0800322 except Exception as e:
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800323 logging.debug('exception in update handler: %s', e)
324 raise e
Shuqian Zhaod9992722016-02-29 12:26:38 -0800325
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800326
327 def _base_update_handler(self, run_args, err_msg_prefix=None):
328 """Handle a remote update ssh call, possibly with retries.
329
330 @param run_args: Dictionary of args passed to ssh_host.run function.
331 @param err_msg_prefix: Prefix of the exception error message.
332 """
333 def exception_handler(e):
334 """Examines exceptions and returns True if the update handler
335 should be retried.
336
337 @param e: the exception intercepted by the retry util.
338 """
339 return (isinstance(e, error.AutoservSSHTimeout) or
340 (isinstance(e, error.GenericHostRunError) and
341 hasattr(e, 'description') and
342 (re.search('ERROR_CODE=37', e.description) or
343 re.search('generic error .255.', e.description))))
344
345 try:
346 # Try the update twice (arg 2 is max_retry, not including the first
347 # call). Some exceptions may be caught by the retry handler.
348 retry_util.GenericRetry(exception_handler, 1,
349 self._base_update_handler_no_retry,
350 run_args)
351 except Exception as e:
352 message = err_msg_prefix + ': ' + str(e)
353 raise RootFSUpdateError(message)
Shuqian Zhaod9992722016-02-29 12:26:38 -0800354
355
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800356 def _wait_for_update_service(self):
357 """Ensure that the update engine daemon is running, possibly
358 by waiting for it a bit in case the DUT just rebooted and the
359 service hasn't started yet.
360 """
361 def handler(e):
362 """Retry exception handler.
363
364 Assumes that the error is due to the update service not having
365 started yet.
366
367 @param e: the exception intercepted by the retry util.
368 """
369 if isinstance(e, error.AutoservRunError):
370 logging.debug('update service check exception: %s\n'
371 'retrying...', e)
372 return True
373 else:
374 return False
375
376 # Retry at most three times, every 5s.
377 status = retry_util.GenericRetry(handler, 3,
378 self.check_update_status,
379 sleep=5)
380
381 # Expect the update engine to be idle.
382 if status != UPDATER_IDLE:
Richard Barnette9d43e562018-06-05 17:20:10 +0000383 raise RootFSUpdateError(
384 'Update engine status is %s (%s was expected).'
385 % (status, UPDATER_IDLE))
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800386
387
Richard Barnette55d1af82018-05-22 23:40:14 +0000388 def _reset_update_engine(self):
389 """Resets the host to prepare for a clean update regardless of state."""
390 self._run('stop ui || true')
391 self._run('stop update-engine || true')
392 self._run('start update-engine')
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700393
Richard Barnette55d1af82018-05-22 23:40:14 +0000394 # Wait for update engine to be ready.
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800395 self._wait_for_update_service()
396
Richard Barnette55d1af82018-05-22 23:40:14 +0000397
398 def _reset_stateful_partition(self):
399 """Clear any pending stateful update request."""
Richard Barnette18fd5842018-05-25 18:21:14 +0000400 self._run('%s --stateful_change=reset 2>&1'
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700401 % self._get_stateful_update_script())
Richard Barnette55d1af82018-05-22 23:40:14 +0000402
403
404 def _revert_boot_partition(self):
405 """Revert the boot partition."""
406 part = self._rootdev('-s')
407 logging.warning('Reverting update; Boot partition will be %s', part)
408 return self._run('/postinst %s 2>&1' % part)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700409
410
Allen Lib5420a72017-06-20 14:14:07 -0700411 def _get_metric_fields(self):
412 """Return a dict of metric fields.
413
414 This is used for sending autoupdate metrics for this instance.
415 """
416 build_name = url_to_image_name(self.update_url)
417 try:
418 board, build_type, milestone, _ = server_utils.ParseBuildName(
419 build_name)
420 except server_utils.ParseBuildNameException:
421 logging.warning('Unable to parse build name %s for metrics. '
422 'Continuing anyway.', build_name)
423 board, build_type, milestone = ('', '', '')
424 return {
425 'dev_server': dev_server.get_hostname(self.update_url),
426 'board': board,
427 'build_type': build_type,
428 'milestone': milestone,
429 }
430
431
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700432 def _verify_update_completed(self):
433 """Verifies that an update has completed.
434
Richard Barnette9d43e562018-06-05 17:20:10 +0000435 @raise RootFSUpdateError if the DUT doesn't indicate that
436 download is complete and the DUT is ready for reboot.
437 @raise RootFSUpdateError if the DUT reports that the partition
438 to be booted next is not the currently inactive
439 partition.
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700440 """
441 status = self.check_update_status()
442 if status != UPDATER_NEED_REBOOT:
Shuqian Zhaod9992722016-02-29 12:26:38 -0800443 error_msg = ''
444 if status == UPDATER_IDLE:
Richard Barnette3e8b2282018-05-15 20:42:20 +0000445 error_msg = 'Update error: %s' % self._get_last_update_error()
Richard Barnette9d43e562018-06-05 17:20:10 +0000446 raise RootFSUpdateError(
447 'Update engine status is %s (%s was expected). %s'
448 % (status, UPDATER_NEED_REBOOT, error_msg))
Richard Barnette4d211c92018-05-24 18:56:08 +0000449 inactive_kernel = self.get_kernel_state()[1]
450 next_kernel = self._get_next_kernel()
451 if next_kernel != inactive_kernel:
Richard Barnette9d43e562018-06-05 17:20:10 +0000452 raise RootFSUpdateError(
Richard Barnette4d211c92018-05-24 18:56:08 +0000453 'Update failed. The kernel for next boot is %s, '
Richard Barnette9d43e562018-06-05 17:20:10 +0000454 'but %s was expected.'
455 % (next_kernel['name'], inactive_kernel['name']))
Richard Barnette4d211c92018-05-24 18:56:08 +0000456 return inactive_kernel
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700457
458
Richard Barnette55d1af82018-05-22 23:40:14 +0000459 def trigger_update(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000460 """Triggers a background update."""
461 # If this function is called immediately after reboot (which it
462 # can be), there is no guarantee that the update engine is up
463 # and running yet, so wait for it.
Richard Barnette55d1af82018-05-22 23:40:14 +0000464 self._wait_for_update_service()
465
466 autoupdate_cmd = ('%s --check_for_update --omaha_url=%s' %
467 (_UPDATER_BIN, self.update_url))
468 run_args = {'command': autoupdate_cmd}
469 err_prefix = 'Failed to trigger an update on %s. ' % self.host.hostname
470 logging.info('Triggering update via: %s', autoupdate_cmd)
471 metric_fields = {'success': False}
472 try:
473 self._base_update_handler(run_args, err_prefix)
474 metric_fields['success'] = True
475 finally:
476 c = metrics.Counter('chromeos/autotest/autoupdater/trigger')
477 metric_fields.update(self._get_metric_fields())
478 c.increment(fields=metric_fields)
479
480
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700481 def update_image(self):
Richard Barnette18fd5842018-05-25 18:21:14 +0000482 """Updates the device root FS and kernel and verifies success."""
Shuqian Zhaofe4d62e2016-06-23 14:46:45 -0700483 autoupdate_cmd = ('%s --update --omaha_url=%s' %
Richard Barnette3e8b2282018-05-15 20:42:20 +0000484 (_UPDATER_BIN, self.update_url))
David Haddock76a4c882017-12-13 18:50:09 -0800485 if not self.interactive:
486 autoupdate_cmd = '%s --interactive=false' % autoupdate_cmd
Shuqian Zhaod9992722016-02-29 12:26:38 -0800487 run_args = {'command': autoupdate_cmd, 'timeout': 3600}
488 err_prefix = ('Failed to install device image using payload at %s '
489 'on %s. ' % (self.update_url, self.host.hostname))
490 logging.info('Updating image via: %s', autoupdate_cmd)
Allen Li1a5cc0a2017-06-20 14:08:59 -0700491 metric_fields = {'success': False}
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800492 try:
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800493 self._base_update_handler(run_args, err_prefix)
Allen Li1a5cc0a2017-06-20 14:08:59 -0700494 metric_fields['success'] = True
495 finally:
Allen Li1a5cc0a2017-06-20 14:08:59 -0700496 c = metrics.Counter('chromeos/autotest/autoupdater/update')
Allen Lib5420a72017-06-20 14:14:07 -0700497 metric_fields.update(self._get_metric_fields())
Allen Li1a5cc0a2017-06-20 14:08:59 -0700498 c.increment(fields=metric_fields)
Richard Barnette4d211c92018-05-24 18:56:08 +0000499 return self._verify_update_completed()
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700500
501
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700502 def _get_remote_script(self, script_name):
503 """Ensure that `script_name` is present on the DUT.
Chris Sosa5e4246b2012-05-22 18:05:22 -0700504
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700505 The given script (e.g. `stateful_update`) may be present in the
506 stateful partition under /usr/local/bin, or we may have to
507 download it from the devserver.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700508
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700509 Determine whether the script is present or must be downloaded
510 and download if necessary. Then, return a command fragment
511 sufficient to run the script from whereever it now lives on the
512 DUT.
Richard Barnette9d43e562018-06-05 17:20:10 +0000513
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700514 @param script_name The name of the script as expected in
515 /usr/local/bin and on the devserver.
516 @return A string with the command (minus arguments) that will
517 run the target script.
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700518 """
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700519 remote_script = '/usr/local/bin/%s' % script_name
520 if self.host.path_exists(remote_script):
521 return remote_script
522 remote_tmp_script = '/tmp/%s' % script_name
523 server_name = urlparse.urlparse(self.update_url)[1]
524 script_url = 'http://%s/static/%s' % (server_name, script_name)
525 fetch_script = (
526 'curl -o %s %s && head -1 %s | grep "^#!" | sed "s/#!//"') % (
527 remote_tmp_script, script_url, remote_tmp_script)
528 script_interpreter = self._run(fetch_script,
529 ignore_status=True).stdout.strip()
530 if not script_interpreter:
531 return None
532 return '%s %s' % (script_interpreter, remote_tmp_script)
Chris Sosa5e4246b2012-05-22 18:05:22 -0700533
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700534
535 def _get_stateful_update_script(self):
536 """Returns a command to run the stateful update script.
537
538 Find `stateful_update` on the target or install it, as
539 necessary. If installation fails, raise an exception.
540
541 @raise StatefulUpdateError if the script can't be found or
542 installed.
543 @return A string that can be joined with arguments to run the
544 `stateful_update` command on the DUT.
545 """
546 script_command = self._get_remote_script(_STATEFUL_UPDATE_SCRIPT)
547 if not script_command:
548 raise StatefulUpdateError('Could not install %s on DUT'
Richard Barnette9d43e562018-06-05 17:20:10 +0000549 % _STATEFUL_UPDATE_SCRIPT)
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700550 return script_command
Chris Sosa5e4246b2012-05-22 18:05:22 -0700551
552
Chris Sosac1932172013-10-16 13:28:53 -0700553 def rollback_rootfs(self, powerwash):
554 """Triggers rollback and waits for it to complete.
555
556 @param powerwash: If true, powerwash as part of rollback.
557
558 @raise RootFSUpdateError if anything went wrong.
Chris Sosac1932172013-10-16 13:28:53 -0700559 """
Dan Shi549fb822015-03-24 18:01:11 -0700560 version = self.host.get_release_version()
Chris Sosac8617522014-06-09 23:22:26 +0000561 # Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches
562 # X.Y.Z. This version split just pulls the first part out.
563 try:
564 build_number = int(version.split('.')[0])
565 except ValueError:
566 logging.error('Could not parse build number.')
567 build_number = 0
568
569 if build_number >= 5772:
Richard Barnette3e8b2282018-05-15 20:42:20 +0000570 can_rollback_cmd = '%s --can_rollback' % _UPDATER_BIN
Chris Sosac8617522014-06-09 23:22:26 +0000571 logging.info('Checking for rollback.')
572 try:
573 self._run(can_rollback_cmd)
574 except error.AutoservRunError as e:
575 raise RootFSUpdateError("Rollback isn't possible on %s: %s" %
576 (self.host.hostname, str(e)))
577
Richard Barnette3e8b2282018-05-15 20:42:20 +0000578 rollback_cmd = '%s --rollback --follow' % _UPDATER_BIN
Chris Sosac1932172013-10-16 13:28:53 -0700579 if not powerwash:
Dan Shif3a35f72016-01-25 11:18:14 -0800580 rollback_cmd += ' --nopowerwash'
Chris Sosac1932172013-10-16 13:28:53 -0700581
Chris Sosac8617522014-06-09 23:22:26 +0000582 logging.info('Performing rollback.')
Chris Sosac1932172013-10-16 13:28:53 -0700583 try:
584 self._run(rollback_cmd)
Chris Sosac1932172013-10-16 13:28:53 -0700585 except error.AutoservRunError as e:
586 raise RootFSUpdateError('Rollback failed on %s: %s' %
587 (self.host.hostname, str(e)))
588
589 self._verify_update_completed()
590
Gilad Arnold0ed760c2012-11-05 23:42:53 -0800591
Chris Sosa72312602013-04-16 15:01:56 -0700592 def update_stateful(self, clobber=True):
593 """Updates the stateful partition.
594
595 @param clobber: If True, a clean stateful installation.
Richard Barnette9d43e562018-06-05 17:20:10 +0000596
597 @raise StatefulUpdateError if the update script fails to
598 complete successfully.
Chris Sosa72312602013-04-16 15:01:56 -0700599 """
Chris Sosa77556d82012-04-05 15:23:14 -0700600 logging.info('Updating stateful partition...')
Richard Barnette18fd5842018-05-25 18:21:14 +0000601 statefuldev_url = self.update_url.replace('update', 'static')
Chris Sosaa3ac2152012-05-23 22:23:13 -0700602
Dale Curtis5c32c722011-05-04 19:24:23 -0700603 # Attempt stateful partition update; this must succeed so that the newly
604 # installed host is testable after update.
Richard Barnettef00a2ee2018-06-08 11:51:38 -0700605 statefuldev_cmd = [self._get_stateful_update_script(), statefuldev_url]
Chris Sosa72312602013-04-16 15:01:56 -0700606 if clobber:
607 statefuldev_cmd.append('--stateful_change=clean')
608
609 statefuldev_cmd.append('2>&1')
Dale Curtis5c32c722011-05-04 19:24:23 -0700610 try:
Dan Shi205b8732016-01-25 10:56:22 -0800611 self._run(' '.join(statefuldev_cmd), timeout=1200)
Dale Curtis5c32c722011-05-04 19:24:23 -0700612 except error.AutoservRunError:
Richard Barnette18fd5842018-05-25 18:21:14 +0000613 raise StatefulUpdateError(
Gilad Arnold62cf3a42015-10-01 09:15:25 -0700614 'Failed to perform stateful update on %s' %
615 self.host.hostname)
Dale Curtis5c32c722011-05-04 19:24:23 -0700616
Chris Sosaa3ac2152012-05-23 22:23:13 -0700617
Richard Barnette54d14f52018-05-18 16:39:49 +0000618 def verify_boot_expectations(self, expected_kernel, rollback_message):
Richard Barnette55d1af82018-05-22 23:40:14 +0000619 """Verifies that we fully booted given expected kernel state.
620
621 This method both verifies that we booted using the correct kernel
622 state and that the OS has marked the kernel as good.
623
Richard Barnette54d14f52018-05-18 16:39:49 +0000624 @param expected_kernel: kernel that we are verifying with,
Richard Barnette55d1af82018-05-22 23:40:14 +0000625 i.e. I expect to be booted onto partition 4 etc. See output of
626 get_kernel_state.
Richard Barnette9d43e562018-06-05 17:20:10 +0000627 @param rollback_message: string include in except message text
Richard Barnette55d1af82018-05-22 23:40:14 +0000628 if we booted with the wrong partition.
629
Richard Barnette9d43e562018-06-05 17:20:10 +0000630 @raise NewBuildUpdateError if any of the various checks fail.
Richard Barnette55d1af82018-05-22 23:40:14 +0000631 """
632 # Figure out the newly active kernel.
Richard Barnette54d14f52018-05-18 16:39:49 +0000633 active_kernel = self.get_kernel_state()[0]
Richard Barnette55d1af82018-05-22 23:40:14 +0000634
635 # Check for rollback due to a bad build.
Richard Barnette54d14f52018-05-18 16:39:49 +0000636 if active_kernel != expected_kernel:
Richard Barnette55d1af82018-05-22 23:40:14 +0000637
638 # Kernel crash reports should be wiped between test runs, but
639 # may persist from earlier parts of the test, or from problems
640 # with provisioning.
641 #
642 # Kernel crash reports will NOT be present if the crash happened
643 # before encrypted stateful is mounted.
644 #
645 # TODO(dgarrett): Integrate with server/crashcollect.py at some
646 # point.
647 kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash')
648 if kernel_crashes:
649 rollback_message += ': kernel_crash'
650 logging.debug('Found %d kernel crash reports:',
651 len(kernel_crashes))
652 # The crash names contain timestamps that may be useful:
653 # kernel.20131207.005945.0.kcrash
654 for crash in kernel_crashes:
655 logging.debug(' %s', os.path.basename(crash))
656
657 # Print out some information to make it easier to debug
658 # the rollback.
659 logging.debug('Dumping partition table.')
660 self._run('cgpt show $(rootdev -s -d)')
661 logging.debug('Dumping crossystem for firmware debugging.')
662 self._run('crossystem --all')
Richard Barnette9d43e562018-06-05 17:20:10 +0000663 raise NewBuildUpdateError(self.update_version, rollback_message)
Richard Barnette55d1af82018-05-22 23:40:14 +0000664
665 # Make sure chromeos-setgoodkernel runs.
666 try:
667 utils.poll_for_condition(
Richard Barnette54d14f52018-05-18 16:39:49 +0000668 lambda: (self._get_kernel_tries(active_kernel) == 0
669 and self._get_kernel_success(active_kernel)),
Richard Barnette9d43e562018-06-05 17:20:10 +0000670 exception=RootFSUpdateError(),
Richard Barnette55d1af82018-05-22 23:40:14 +0000671 timeout=_KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
Richard Barnette9d43e562018-06-05 17:20:10 +0000672 except RootFSUpdateError:
Richard Barnette55d1af82018-05-22 23:40:14 +0000673 services_status = self._run('status system-services').stdout
674 if services_status != 'system-services start/running\n':
675 event = ('Chrome failed to reach login screen')
676 else:
677 event = ('update-engine failed to call '
678 'chromeos-setgoodkernel')
Richard Barnette9d43e562018-06-05 17:20:10 +0000679 raise NewBuildUpdateError(self.update_version, event)
Richard Barnette55d1af82018-05-22 23:40:14 +0000680
681
Richard Barnette14ee84c2018-05-18 20:23:42 +0000682 def _prepare_host(self):
683 """Make sure the target DUT is working and ready for update.
684
685 Initially, the target DUT's state is unknown. The DUT is
686 expected to be online, but we strive to be forgiving if Chrome
687 and/or the update engine aren't fully functional.
688 """
689 # Summary of work, and the rationale:
690 # 1. Reboot, because it's a good way to clear out problems.
691 # 2. Touch the PROVISION_FAILED file, to allow repair to detect
692 # failure later.
693 # 3. Run the hook for host class specific preparation.
694 # 4. Stop Chrome, because the system is designed to eventually
695 # reboot if Chrome is stuck in a crash loop.
696 # 5. Force `update-engine` to start, because if Chrome failed
697 # to start properly, the status of the `update-engine` job
698 # will be uncertain.
699 self._reset_stateful_partition()
700 self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
701 self._run('touch %s' % PROVISION_FAILED)
702 self.host.prepare_for_update()
703 self._run('stop ui || true')
704 self._run('start update-engine || true')
705 self._wait_for_update_service()
706 logging.info('Updating from version %s to %s.',
707 self.host.get_release_version(),
708 self.update_version)
709
710
711 def _verify_devserver(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000712 """Check that our chosen devserver is still working.
713
714 @raise DevServerError if the devserver fails any sanity check.
715 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000716 server = 'http://%s' % urlparse.urlparse(self.update_url)[1]
717 try:
718 if not dev_server.ImageServer.devserver_healthy(server):
Richard Barnette9d43e562018-06-05 17:20:10 +0000719 raise DevServerError(
720 server, 'Devserver is not healthy')
Richard Barnette14ee84c2018-05-18 20:23:42 +0000721 except Exception as e:
Richard Barnette9d43e562018-06-05 17:20:10 +0000722 raise DevServerError(
723 server, 'Devserver is not up and available')
Richard Barnette14ee84c2018-05-18 20:23:42 +0000724
725
Richard Barnette54d14f52018-05-18 16:39:49 +0000726 def _install_update(self):
Richard Barnette0beb14b2018-05-15 18:07:52 +0000727 """Install the requested image on the DUT, but don't start it.
728
729 This downloads all content needed for the requested update, and
730 installs it in place on the DUT. This does not reboot the DUT,
Richard Barnette14ee84c2018-05-18 20:23:42 +0000731 so the update is merely pending when the method returns.
Dan Shi0f466e82013-02-22 15:44:58 -0800732 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000733 logging.info('Installing image at %s onto %s',
734 self.update_url, self.host.hostname)
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200735 try:
Richard Barnette14ee84c2018-05-18 20:23:42 +0000736 expected_kernel = self.update_image()
737 self.update_stateful()
Dale Curtis1e973182011-07-12 18:21:36 -0700738 logging.info('Update complete.')
Dale Curtis1e973182011-07-12 18:21:36 -0700739 except:
Richard Barnette14ee84c2018-05-18 20:23:42 +0000740 self._revert_boot_partition()
741 self._reset_stateful_partition()
Dale Curtis1e973182011-07-12 18:21:36 -0700742 # Collect update engine logs in the event of failure.
743 if self.host.job:
Aviv Keshet2610d3e2016-06-01 16:37:01 -0700744 logging.info('Collecting update engine logs due to failure...')
Dale Curtis1e973182011-07-12 18:21:36 -0700745 self.host.get_file(
Richard Barnette3e8b2282018-05-15 20:42:20 +0000746 _UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
Gilad Arnold0c0df732015-09-21 06:37:59 -0700747 preserve_perm=False)
Richard Barnette3e8b2282018-05-15 20:42:20 +0000748 _list_image_dir_contents(self.update_url)
Dale Curtis1e973182011-07-12 18:21:36 -0700749 raise
Richard Barnette4d211c92018-05-24 18:56:08 +0000750 return expected_kernel
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200751
752
Richard Barnette14ee84c2018-05-18 20:23:42 +0000753 def _complete_update(self, expected_kernel):
754 """Finish the update, and confirm that it succeeded.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000755
Richard Barnette14ee84c2018-05-18 20:23:42 +0000756 Initial condition is that the target build has been downloaded
757 and installed on the DUT, but has not yet been booted. This
758 function is responsible for rebooting the DUT, and checking that
759 the new build is running successfully.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000760
Richard Barnette14ee84c2018-05-18 20:23:42 +0000761 @param expected_kernel: kernel expected to be active after reboot.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000762 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000763 # Regarding the 'crossystem' command below: In some cases,
764 # the update flow puts the TPM into a state such that it
765 # fails verification. We don't know why. However, this
766 # call papers over the problem by clearing the TPM during
767 # the reboot.
768 #
769 # We ignore failures from 'crossystem'. Although failure
770 # here is unexpected, and could signal a bug, the point of
771 # the exercise is to paper over problems; allowing this to
772 # fail would defeat the purpose.
773 self._run('crossystem clear_tpm_owner_request=1',
774 ignore_status=True)
775 self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
776
Richard Barnette0beb14b2018-05-15 18:07:52 +0000777 # Touch the lab machine file to leave a marker that
778 # distinguishes this image from other test images.
779 # Afterwards, we must re-run the autoreboot script because
780 # it depends on the _LAB_MACHINE_FILE.
781 autoreboot_cmd = ('FILE="%s" ; [ -f "$FILE" ] || '
782 '( touch "$FILE" ; start autoreboot )')
Richard Barnette3e8b2282018-05-15 20:42:20 +0000783 self._run(autoreboot_cmd % _LAB_MACHINE_FILE)
Richard Barnette0beb14b2018-05-15 18:07:52 +0000784 self.verify_boot_expectations(
785 expected_kernel, rollback_message=
786 'Build %s failed to boot on %s; system rolled back to previous '
787 'build' % (self.update_version, self.host.hostname))
788
789 logging.debug('Cleaning up old autotest directories.')
790 try:
791 installed_autodir = autotest.Autotest.get_installed_autodir(
792 self.host)
793 self._run('rm -rf ' + installed_autodir)
794 except autotest.AutodirNotFoundError:
795 logging.debug('No autotest installed directory found.')
796
797
Richard Barnette54d14f52018-05-18 16:39:49 +0000798 def run_update(self):
Richard Barnette0beb14b2018-05-15 18:07:52 +0000799 """Perform a full update of a DUT in the test lab.
800
801 This downloads and installs the root FS and stateful partition
802 content needed for the update specified in `self.host` and
803 `self.update_url`. The update is performed according to the
804 requirements for provisioning a DUT for testing the requested
805 build.
806
Richard Barnette0beb14b2018-05-15 18:07:52 +0000807 @returns A tuple of the form `(image_name, attributes)`, where
808 `image_name` is the name of the image installed, and
809 `attributes` is new attributes to be applied to the DUT.
Richard Barnette9d43e562018-06-05 17:20:10 +0000810 @raise HostUpdateError if failure is caused by a problem on
811 the DUT prior to the update.
812 @raise ImageInstallError if the failure occurs during download
813 and install of the update and cannot be definitively
814 blamed on either the DUT or the devserver.
815 @raise NewBuildUpdateError if the failure occurs because the
816 new build fails to function correctly.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000817 """
Richard Barnette0beb14b2018-05-15 18:07:52 +0000818 server_name = dev_server.get_hostname(self.update_url)
819 (metrics.Counter('chromeos/autotest/provision/install')
820 .increment(fields={'devserver': server_name}))
821
Richard Barnette14ee84c2018-05-18 20:23:42 +0000822 self._verify_devserver()
Richard Barnette9d43e562018-06-05 17:20:10 +0000823
824 try:
825 self._prepare_host()
826 except _AttributedUpdateError:
827 raise
828 except Exception as e:
829 logging.exception('Failure preparing host prior to update.')
830 raise HostUpdateError(self.host.hostname, str(e))
831
832 try:
833 expected_kernel = self._install_update()
834 except _AttributedUpdateError:
835 raise
836 except Exception as e:
837 logging.exception('Failure during download and install.')
838 raise ImageInstallError(self.host.hostname, server_name, str(e))
839
840 try:
841 self._complete_update(expected_kernel)
842 except _AttributedUpdateError:
843 raise
844 except Exception as e:
845 logging.exception('Failure from build after update.')
846 raise NewBuildUpdateError(self.update_version, str(e))
Richard Barnette0beb14b2018-05-15 18:07:52 +0000847
Richard Barnette0beb14b2018-05-15 18:07:52 +0000848 image_name = url_to_image_name(self.update_url)
849 # update_url is different from devserver url needed to stage autotest
850 # packages, therefore, resolve a new devserver url here.
851 devserver_url = dev_server.ImageServer.resolve(
852 image_name, self.host.hostname).url()
853 repo_url = tools.get_package_url(devserver_url, image_name)
854 return image_name, {ds_constants.JOB_REPO_URL: repo_url}