blob: feff8d9ecc2d5bd03efa9237a1a1660ce7dd48a5 [file] [log] [blame]
Chris Sosa5e4246b2012-05-22 18:05:22 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Sean O'Connor5346e4e2010-08-12 18:49:24 +02002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Don Garrett56b1cc82013-12-06 17:49:20 -08005import glob
Sean O'Connor5346e4e2010-08-12 18:49:24 +02006import logging
Dale Curtis5c32c722011-05-04 19:24:23 -07007import os
Sean O'Connor5346e4e2010-08-12 18:49:24 +02008import re
Prashanth B32baa9b2014-03-13 13:23:01 -07009import urllib2
Richard Barnette0beb14b2018-05-15 18:07:52 +000010import urlparse
Sean O'Connor5346e4e2010-08-12 18:49:24 +020011
Chris Sosa65425082013-10-16 13:26:22 -070012from autotest_lib.client.bin import utils
Dale Curtis5c32c722011-05-04 19:24:23 -070013from autotest_lib.client.common_lib import error, global_config
Prashanth B32baa9b2014-03-13 13:23:01 -070014from autotest_lib.client.common_lib.cros import dev_server
Richard Barnette0beb14b2018-05-15 18:07:52 +000015from autotest_lib.server import autotest
Shelley Chen61d28982016-10-28 09:40:20 -070016from autotest_lib.server import utils as server_utils
Richard Barnette0beb14b2018-05-15 18:07:52 +000017from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
18from autotest_lib.server.cros.dynamic_suite import tools
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -080019from chromite.lib import retry_util
Dan Shif3a35f72016-01-25 11:18:14 -080020
Shelley Chen16b8df32016-10-27 16:24:21 -070021try:
22 from chromite.lib import metrics
Dan Shi5e2efb72017-02-07 11:40:23 -080023except ImportError:
24 metrics = utils.metrics_mock
Sean O'Connor5346e4e2010-08-12 18:49:24 +020025
Gwendal Grignou3e96cc22017-06-07 16:22:51 -070026try:
27 import devserver
Richard Barnette3e8b2282018-05-15 20:42:20 +000028 _STATEFUL_UPDATE_PATH = devserver.__path__[0]
Gwendal Grignou3e96cc22017-06-07 16:22:51 -070029except ImportError:
Richard Barnette3e8b2282018-05-15 20:42:20 +000030 _STATEFUL_UPDATE_PATH = '/usr/bin'
Gwendal Grignou3e96cc22017-06-07 16:22:51 -070031
Dale Curtis5c32c722011-05-04 19:24:23 -070032# Local stateful update path is relative to the CrOS source directory.
Sean O'Connor5346e4e2010-08-12 18:49:24 +020033UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
Sean Oc053dfe2010-08-23 18:22:26 +020034UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
beeps5e8c45a2013-12-17 22:05:11 -080035# A list of update engine client states that occur after an update is triggered.
36UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE',
37 'UPDATE_STATUS_UPDATE_AVAILABLE',
38 'UPDATE_STATUS_DOWNLOADING',
39 'UPDATE_STATUS_FINALIZING']
Sean O'Connor5346e4e2010-08-12 18:49:24 +020040
Richard Barnette0beb14b2018-05-15 18:07:52 +000041
Richard Barnette3e8b2282018-05-15 20:42:20 +000042_STATEFUL_UPDATE_SCRIPT = 'stateful_update'
43_REMOTE_STATEFUL_UPDATE_PATH = os.path.join(
44 '/usr/local/bin', _STATEFUL_UPDATE_SCRIPT)
45_REMOTE_TMP_STATEFUL_UPDATE = os.path.join(
46 '/tmp', _STATEFUL_UPDATE_SCRIPT)
47
48_UPDATER_BIN = '/usr/bin/update_engine_client'
49_UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine']
50
51_KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3}
52_KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5}
53
54# Time to wait for new kernel to be marked successful after
55# auto update.
56_KERNEL_UPDATE_TIMEOUT = 120
57
58
Richard Barnette0beb14b2018-05-15 18:07:52 +000059# PROVISION_FAILED - A flag file to indicate provision failures. The
60# file is created at the start of any AU procedure (see
Richard Barnette9d43e562018-06-05 17:20:10 +000061# `ChromiumOSUpdater._prepare_host()`). The file's location in
Richard Barnette0beb14b2018-05-15 18:07:52 +000062# stateful means that on successul update it will be removed. Thus, if
63# this file exists, it indicates that we've tried and failed in a
64# previous attempt to update.
65PROVISION_FAILED = '/var/tmp/provision_failed'
66
67
Richard Barnette3e8b2282018-05-15 20:42:20 +000068# A flag file used to enable special handling in lab DUTs. Some
69# parts of the system in Chromium OS test images will behave in ways
70# convenient to the test lab when this file is present. Generally,
71# we create this immediately after any update completes.
72_LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
73
74
Richard Barnette9d43e562018-06-05 17:20:10 +000075class RootFSUpdateError(error.TestFail):
Chris Sosa77556d82012-04-05 15:23:14 -070076 """Raised when the RootFS fails to update."""
Chris Sosa77556d82012-04-05 15:23:14 -070077
78
Richard Barnette9d43e562018-06-05 17:20:10 +000079class StatefulUpdateError(error.TestFail):
Chris Sosa77556d82012-04-05 15:23:14 -070080 """Raised when the stateful partition fails to update."""
Chris Sosa77556d82012-04-05 15:23:14 -070081
82
Richard Barnette9d43e562018-06-05 17:20:10 +000083class _AttributedUpdateError(error.TestFail):
84 """Update failure with an attributed cause."""
85
86 def __init__(self, attribution, msg):
87 super(_AttributedUpdateError, self).__init__(
88 '%s: %s' % (attribution, msg))
89
90
91class HostUpdateError(_AttributedUpdateError):
92 """Failure updating a DUT attributable to the DUT.
93
94 This class of exception should be raised when the most likely cause
95 of failure was a condition existing on the DUT prior to the update,
96 such as a hardware problem, or a bug in the software on the DUT.
97 """
98
99 def __init__(self, hostname, msg):
100 super(HostUpdateError, self).__init__(
101 'Error on %s prior to update' % hostname, msg)
102
103
104class DevServerError(_AttributedUpdateError):
105 """Failure updating a DUT attributable to the devserver.
106
107 This class of exception should be raised when the most likely cause
108 of failure was the devserver serving the target image for update.
109 """
110
111 def __init__(self, devserver, msg):
112 super(DevServerError, self).__init__(
113 'Devserver error on %s' % devserver, msg)
114
115
116class ImageInstallError(_AttributedUpdateError):
117 """Failure updating a DUT when installing from the devserver.
118
119 This class of exception should be raised when the target DUT fails
120 to download and install the target image from the devserver, and
121 either the devserver or the DUT might be at fault.
122 """
123
124 def __init__(self, hostname, devserver, msg):
125 super(ImageInstallError, self).__init__(
126 'Download and install failed from %s onto %s'
127 % (devserver, hostname), msg)
128
129
130class NewBuildUpdateError(_AttributedUpdateError):
131 """Failure updating a DUT attributable to the target build.
132
133 This class of exception should be raised when updating to a new
134 build fails, and the most likely cause of the failure is a bug in
135 the newly installed target build.
136 """
137
138 def __init__(self, update_version, msg):
139 super(NewBuildUpdateError, self).__init__(
140 'Failure in build %s' % update_version, msg)
141
142
Richard Barnette3e8b2282018-05-15 20:42:20 +0000143def _url_to_version(update_url):
Dan Shi0f466e82013-02-22 15:44:58 -0800144 """Return the version based on update_url.
145
146 @param update_url: url to the image to update to.
147
148 """
Dale Curtisddfdb942011-07-14 13:59:24 -0700149 # The Chrome OS version is generally the last element in the URL. The only
150 # exception is delta update URLs, which are rooted under the version; e.g.,
151 # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
152 # strip off the au section of the path before reading the version.
Dan Shi5002cfc2013-04-29 10:45:05 -0700153 return re.sub('/au/.*', '',
154 urlparse.urlparse(update_url).path).split('/')[-1].strip()
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200155
156
Scott Zawalskieadbf702013-03-14 09:23:06 -0400157def url_to_image_name(update_url):
158 """Return the image name based on update_url.
159
160 From a URL like:
161 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
162 return lumpy-release/R27-3837.0.0
163
164 @param update_url: url to the image to update to.
165 @returns a string representing the image name in the update_url.
166
167 """
168 return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
169
170
Prashanth B32baa9b2014-03-13 13:23:01 -0700171def _get_devserver_build_from_update_url(update_url):
172 """Get the devserver and build from the update url.
173
174 @param update_url: The url for update.
175 Eg: http://devserver:port/update/build.
176
177 @return: A tuple of (devserver url, build) or None if the update_url
178 doesn't match the expected pattern.
179
180 @raises ValueError: If the update_url doesn't match the expected pattern.
181 @raises ValueError: If no global_config was found, or it doesn't contain an
182 image_url_pattern.
183 """
184 pattern = global_config.global_config.get_config_value(
185 'CROS', 'image_url_pattern', type=str, default='')
186 if not pattern:
187 raise ValueError('Cannot parse update_url, the global config needs '
188 'an image_url_pattern.')
189 re_pattern = pattern.replace('%s', '(\S+)')
190 parts = re.search(re_pattern, update_url)
191 if not parts or len(parts.groups()) < 2:
192 raise ValueError('%s is not an update url' % update_url)
193 return parts.groups()
194
195
Richard Barnette3e8b2282018-05-15 20:42:20 +0000196def _list_image_dir_contents(update_url):
Prashanth B32baa9b2014-03-13 13:23:01 -0700197 """Lists the contents of the devserver for a given build/update_url.
198
199 @param update_url: An update url. Eg: http://devserver:port/update/build.
200 """
201 if not update_url:
202 logging.warning('Need update_url to list contents of the devserver.')
203 return
204 error_msg = 'Cannot check contents of devserver, update url %s' % update_url
205 try:
206 devserver_url, build = _get_devserver_build_from_update_url(update_url)
207 except ValueError as e:
208 logging.warning('%s: %s', error_msg, e)
209 return
210 devserver = dev_server.ImageServer(devserver_url)
211 try:
212 devserver.list_image_dir(build)
213 # The devserver will retry on URLError to avoid flaky connections, but will
214 # eventually raise the URLError if it persists. All HTTPErrors get
215 # converted to DevServerExceptions.
216 except (dev_server.DevServerException, urllib2.URLError) as e:
217 logging.warning('%s: %s', error_msg, e)
218
219
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700220# TODO(garnold) This implements shared updater functionality needed for
221# supporting the autoupdate_EndToEnd server-side test. We should probably
222# migrate more of the existing ChromiumOSUpdater functionality to it as we
223# expand non-CrOS support in other tests.
Richard Barnette3e8b2282018-05-15 20:42:20 +0000224class ChromiumOSUpdater(object):
225 """Chromium OS specific DUT update functionality."""
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700226
Richard Barnette3e8b2282018-05-15 20:42:20 +0000227 def __init__(self, update_url, host=None, interactive=True):
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700228 """Initializes the object.
229
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700230 @param update_url: The URL we want the update to use.
231 @param host: A client.common_lib.hosts.Host implementation.
David Haddock76a4c882017-12-13 18:50:09 -0800232 @param interactive: Bool whether we are doing an interactive update.
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700233 """
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700234 self.update_url = update_url
235 self.host = host
David Haddock76a4c882017-12-13 18:50:09 -0800236 self.interactive = interactive
Richard Barnette3e8b2282018-05-15 20:42:20 +0000237 self.update_version = _url_to_version(update_url)
238
239
240 def _run(self, cmd, *args, **kwargs):
241 """Abbreviated form of self.host.run(...)"""
242 return self.host.run(cmd, *args, **kwargs)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700243
244
245 def check_update_status(self):
246 """Returns the current update engine state.
247
248 We use the `update_engine_client -status' command and parse the line
249 indicating the update state, e.g. "CURRENT_OP=UPDATE_STATUS_IDLE".
250 """
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800251 update_status = self.host.run(command='%s -status | grep CURRENT_OP' %
Richard Barnette3e8b2282018-05-15 20:42:20 +0000252 _UPDATER_BIN)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700253 return update_status.stdout.strip().split('=')[-1]
254
255
Richard Barnette55d1af82018-05-22 23:40:14 +0000256 def _rootdev(self, options=''):
257 """Returns the stripped output of rootdev <options>.
258
259 @param options: options to run rootdev.
260
261 """
262 return self._run('rootdev %s' % options).stdout.strip()
263
264
265 def get_kernel_state(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000266 """Returns the (<active>, <inactive>) kernel state as a pair.
267
268 @raise RootFSUpdateError if the DUT reports a root partition
269 number that isn't one of the known valid values.
270 """
Richard Barnette55d1af82018-05-22 23:40:14 +0000271 active_root = int(re.findall('\d+\Z', self._rootdev('-s'))[0])
272 if active_root == _KERNEL_A['root']:
273 return _KERNEL_A, _KERNEL_B
274 elif active_root == _KERNEL_B['root']:
275 return _KERNEL_B, _KERNEL_A
276 else:
Richard Barnette9d43e562018-06-05 17:20:10 +0000277 raise RootFSUpdateError(
278 'Encountered unknown root partition: %s' % active_root)
Richard Barnette55d1af82018-05-22 23:40:14 +0000279
280
Richard Barnette18fd5842018-05-25 18:21:14 +0000281 def _cgpt(self, flag, kernel):
282 """Return numeric cgpt value for the specified flag, kernel, device."""
283 return int(self._run('cgpt show -n -i %d %s $(rootdev -s -d)' % (
284 kernel['kernel'], flag)).stdout.strip())
Richard Barnette55d1af82018-05-22 23:40:14 +0000285
286
287 def _get_next_kernel(self):
288 """Return the kernel that has priority for the next boot."""
289 priority_a = self._cgpt('-P', _KERNEL_A)
290 priority_b = self._cgpt('-P', _KERNEL_B)
291 if priority_a > priority_b:
292 return _KERNEL_A
293 else:
294 return _KERNEL_B
295
296
297 def _get_kernel_success(self, kernel):
298 """Return boolean success flag for the specified kernel.
299
300 @param kernel: information of the given kernel, either _KERNEL_A
301 or _KERNEL_B.
302 """
303 return self._cgpt('-S', kernel) != 0
304
305
306 def _get_kernel_tries(self, kernel):
307 """Return tries count for the specified kernel.
308
309 @param kernel: information of the given kernel, either _KERNEL_A
310 or _KERNEL_B.
311 """
312 return self._cgpt('-T', kernel)
313
314
Richard Barnette3e8b2282018-05-15 20:42:20 +0000315 def _get_last_update_error(self):
Shuqian Zhaod9992722016-02-29 12:26:38 -0800316 """Get the last autoupdate error code."""
Richard Barnette3e8b2282018-05-15 20:42:20 +0000317 command_result = self._run(
318 '%s --last_attempt_error' % _UPDATER_BIN)
319 return command_result.stdout.strip().replace('\n', ', ')
Shuqian Zhaod9992722016-02-29 12:26:38 -0800320
321
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800322 def _base_update_handler_no_retry(self, run_args):
Shuqian Zhaod9992722016-02-29 12:26:38 -0800323 """Base function to handle a remote update ssh call.
324
325 @param run_args: Dictionary of args passed to ssh_host.run function.
Shuqian Zhaod9992722016-02-29 12:26:38 -0800326
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800327 @throws: intercepts and re-throws all exceptions
Shuqian Zhaod9992722016-02-29 12:26:38 -0800328 """
Shuqian Zhaod9992722016-02-29 12:26:38 -0800329 try:
330 self.host.run(**run_args)
Shuqian Zhaod9992722016-02-29 12:26:38 -0800331 except Exception as e:
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800332 logging.debug('exception in update handler: %s', e)
333 raise e
Shuqian Zhaod9992722016-02-29 12:26:38 -0800334
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800335
336 def _base_update_handler(self, run_args, err_msg_prefix=None):
337 """Handle a remote update ssh call, possibly with retries.
338
339 @param run_args: Dictionary of args passed to ssh_host.run function.
340 @param err_msg_prefix: Prefix of the exception error message.
341 """
342 def exception_handler(e):
343 """Examines exceptions and returns True if the update handler
344 should be retried.
345
346 @param e: the exception intercepted by the retry util.
347 """
348 return (isinstance(e, error.AutoservSSHTimeout) or
349 (isinstance(e, error.GenericHostRunError) and
350 hasattr(e, 'description') and
351 (re.search('ERROR_CODE=37', e.description) or
352 re.search('generic error .255.', e.description))))
353
354 try:
355 # Try the update twice (arg 2 is max_retry, not including the first
356 # call). Some exceptions may be caught by the retry handler.
357 retry_util.GenericRetry(exception_handler, 1,
358 self._base_update_handler_no_retry,
359 run_args)
360 except Exception as e:
361 message = err_msg_prefix + ': ' + str(e)
362 raise RootFSUpdateError(message)
Shuqian Zhaod9992722016-02-29 12:26:38 -0800363
364
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800365 def _wait_for_update_service(self):
366 """Ensure that the update engine daemon is running, possibly
367 by waiting for it a bit in case the DUT just rebooted and the
368 service hasn't started yet.
369 """
370 def handler(e):
371 """Retry exception handler.
372
373 Assumes that the error is due to the update service not having
374 started yet.
375
376 @param e: the exception intercepted by the retry util.
377 """
378 if isinstance(e, error.AutoservRunError):
379 logging.debug('update service check exception: %s\n'
380 'retrying...', e)
381 return True
382 else:
383 return False
384
385 # Retry at most three times, every 5s.
386 status = retry_util.GenericRetry(handler, 3,
387 self.check_update_status,
388 sleep=5)
389
390 # Expect the update engine to be idle.
391 if status != UPDATER_IDLE:
Richard Barnette9d43e562018-06-05 17:20:10 +0000392 raise RootFSUpdateError(
393 'Update engine status is %s (%s was expected).'
394 % (status, UPDATER_IDLE))
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800395
396
Richard Barnette55d1af82018-05-22 23:40:14 +0000397 def _reset_update_engine(self):
398 """Resets the host to prepare for a clean update regardless of state."""
399 self._run('stop ui || true')
400 self._run('stop update-engine || true')
401 self._run('start update-engine')
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700402
Richard Barnette55d1af82018-05-22 23:40:14 +0000403 # Wait for update engine to be ready.
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800404 self._wait_for_update_service()
405
Richard Barnette55d1af82018-05-22 23:40:14 +0000406
407 def _reset_stateful_partition(self):
408 """Clear any pending stateful update request."""
Richard Barnette18fd5842018-05-25 18:21:14 +0000409 self._run('%s --stateful_change=reset 2>&1'
410 % self.get_stateful_update_script())
Richard Barnette55d1af82018-05-22 23:40:14 +0000411
412
413 def _revert_boot_partition(self):
414 """Revert the boot partition."""
415 part = self._rootdev('-s')
416 logging.warning('Reverting update; Boot partition will be %s', part)
417 return self._run('/postinst %s 2>&1' % part)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700418
419
Allen Lib5420a72017-06-20 14:14:07 -0700420 def _get_metric_fields(self):
421 """Return a dict of metric fields.
422
423 This is used for sending autoupdate metrics for this instance.
424 """
425 build_name = url_to_image_name(self.update_url)
426 try:
427 board, build_type, milestone, _ = server_utils.ParseBuildName(
428 build_name)
429 except server_utils.ParseBuildNameException:
430 logging.warning('Unable to parse build name %s for metrics. '
431 'Continuing anyway.', build_name)
432 board, build_type, milestone = ('', '', '')
433 return {
434 'dev_server': dev_server.get_hostname(self.update_url),
435 'board': board,
436 'build_type': build_type,
437 'milestone': milestone,
438 }
439
440
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700441 def _verify_update_completed(self):
442 """Verifies that an update has completed.
443
Richard Barnette9d43e562018-06-05 17:20:10 +0000444 @raise RootFSUpdateError if the DUT doesn't indicate that
445 download is complete and the DUT is ready for reboot.
446 @raise RootFSUpdateError if the DUT reports that the partition
447 to be booted next is not the currently inactive
448 partition.
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700449 """
450 status = self.check_update_status()
451 if status != UPDATER_NEED_REBOOT:
Shuqian Zhaod9992722016-02-29 12:26:38 -0800452 error_msg = ''
453 if status == UPDATER_IDLE:
Richard Barnette3e8b2282018-05-15 20:42:20 +0000454 error_msg = 'Update error: %s' % self._get_last_update_error()
Richard Barnette9d43e562018-06-05 17:20:10 +0000455 raise RootFSUpdateError(
456 'Update engine status is %s (%s was expected). %s'
457 % (status, UPDATER_NEED_REBOOT, error_msg))
Richard Barnette4d211c92018-05-24 18:56:08 +0000458 inactive_kernel = self.get_kernel_state()[1]
459 next_kernel = self._get_next_kernel()
460 if next_kernel != inactive_kernel:
Richard Barnette9d43e562018-06-05 17:20:10 +0000461 raise RootFSUpdateError(
Richard Barnette4d211c92018-05-24 18:56:08 +0000462 'Update failed. The kernel for next boot is %s, '
Richard Barnette9d43e562018-06-05 17:20:10 +0000463 'but %s was expected.'
464 % (next_kernel['name'], inactive_kernel['name']))
Richard Barnette4d211c92018-05-24 18:56:08 +0000465 return inactive_kernel
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700466
467
Richard Barnette55d1af82018-05-22 23:40:14 +0000468 def trigger_update(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000469 """Triggers a background update."""
470 # If this function is called immediately after reboot (which it
471 # can be), there is no guarantee that the update engine is up
472 # and running yet, so wait for it.
Richard Barnette55d1af82018-05-22 23:40:14 +0000473 self._wait_for_update_service()
474
475 autoupdate_cmd = ('%s --check_for_update --omaha_url=%s' %
476 (_UPDATER_BIN, self.update_url))
477 run_args = {'command': autoupdate_cmd}
478 err_prefix = 'Failed to trigger an update on %s. ' % self.host.hostname
479 logging.info('Triggering update via: %s', autoupdate_cmd)
480 metric_fields = {'success': False}
481 try:
482 self._base_update_handler(run_args, err_prefix)
483 metric_fields['success'] = True
484 finally:
485 c = metrics.Counter('chromeos/autotest/autoupdater/trigger')
486 metric_fields.update(self._get_metric_fields())
487 c.increment(fields=metric_fields)
488
489
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700490 def update_image(self):
Richard Barnette18fd5842018-05-25 18:21:14 +0000491 """Updates the device root FS and kernel and verifies success."""
Shuqian Zhaofe4d62e2016-06-23 14:46:45 -0700492 autoupdate_cmd = ('%s --update --omaha_url=%s' %
Richard Barnette3e8b2282018-05-15 20:42:20 +0000493 (_UPDATER_BIN, self.update_url))
David Haddock76a4c882017-12-13 18:50:09 -0800494 if not self.interactive:
495 autoupdate_cmd = '%s --interactive=false' % autoupdate_cmd
Shuqian Zhaod9992722016-02-29 12:26:38 -0800496 run_args = {'command': autoupdate_cmd, 'timeout': 3600}
497 err_prefix = ('Failed to install device image using payload at %s '
498 'on %s. ' % (self.update_url, self.host.hostname))
499 logging.info('Updating image via: %s', autoupdate_cmd)
Allen Li1a5cc0a2017-06-20 14:08:59 -0700500 metric_fields = {'success': False}
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800501 try:
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800502 self._base_update_handler(run_args, err_prefix)
Allen Li1a5cc0a2017-06-20 14:08:59 -0700503 metric_fields['success'] = True
504 finally:
Allen Li1a5cc0a2017-06-20 14:08:59 -0700505 c = metrics.Counter('chromeos/autotest/autoupdater/update')
Allen Lib5420a72017-06-20 14:14:07 -0700506 metric_fields.update(self._get_metric_fields())
Allen Li1a5cc0a2017-06-20 14:08:59 -0700507 c.increment(fields=metric_fields)
Richard Barnette4d211c92018-05-24 18:56:08 +0000508 return self._verify_update_completed()
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700509
510
Chris Sosa5e4246b2012-05-22 18:05:22 -0700511 def get_stateful_update_script(self):
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700512 """Returns the path to the stateful update script on the target.
Chris Sosa5e4246b2012-05-22 18:05:22 -0700513
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700514 When runnning test_that, stateful_update is in chroot /usr/sbin,
515 as installed by chromeos-base/devserver packages.
516 In the lab, it is installed with the python module devserver, by
517 build_externals.py command.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700518
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700519 If we can find it, we hope it exists already on the DUT, we assert
520 otherwise.
Richard Barnette9d43e562018-06-05 17:20:10 +0000521
522 @raise StatefulUpdateError if the script can't be installed.
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700523 """
Richard Barnette3e8b2282018-05-15 20:42:20 +0000524 stateful_update_file = os.path.join(_STATEFUL_UPDATE_PATH,
525 _STATEFUL_UPDATE_SCRIPT)
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700526 if os.path.exists(stateful_update_file):
Chris Sosa5e4246b2012-05-22 18:05:22 -0700527 self.host.send_file(
Richard Barnette3e8b2282018-05-15 20:42:20 +0000528 stateful_update_file, _REMOTE_TMP_STATEFUL_UPDATE,
Gilad Arnold0c0df732015-09-21 06:37:59 -0700529 delete_dest=True)
Richard Barnette3e8b2282018-05-15 20:42:20 +0000530 return _REMOTE_TMP_STATEFUL_UPDATE
Chris Sosa5e4246b2012-05-22 18:05:22 -0700531
Richard Barnette3e8b2282018-05-15 20:42:20 +0000532 if self.host.path_exists(_REMOTE_STATEFUL_UPDATE_PATH):
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700533 logging.warning('Could not chroot %s script, falling back on %s',
Richard Barnette3e8b2282018-05-15 20:42:20 +0000534 _STATEFUL_UPDATE_SCRIPT,
535 _REMOTE_STATEFUL_UPDATE_PATH)
536 return _REMOTE_STATEFUL_UPDATE_PATH
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700537 else:
Richard Barnette9d43e562018-06-05 17:20:10 +0000538 raise StatefulUpdateError('Could not locate %s'
539 % _STATEFUL_UPDATE_SCRIPT)
Chris Sosa5e4246b2012-05-22 18:05:22 -0700540
541
Chris Sosac1932172013-10-16 13:28:53 -0700542 def rollback_rootfs(self, powerwash):
543 """Triggers rollback and waits for it to complete.
544
545 @param powerwash: If true, powerwash as part of rollback.
546
547 @raise RootFSUpdateError if anything went wrong.
Chris Sosac1932172013-10-16 13:28:53 -0700548 """
Dan Shi549fb822015-03-24 18:01:11 -0700549 version = self.host.get_release_version()
Chris Sosac8617522014-06-09 23:22:26 +0000550 # Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches
551 # X.Y.Z. This version split just pulls the first part out.
552 try:
553 build_number = int(version.split('.')[0])
554 except ValueError:
555 logging.error('Could not parse build number.')
556 build_number = 0
557
558 if build_number >= 5772:
Richard Barnette3e8b2282018-05-15 20:42:20 +0000559 can_rollback_cmd = '%s --can_rollback' % _UPDATER_BIN
Chris Sosac8617522014-06-09 23:22:26 +0000560 logging.info('Checking for rollback.')
561 try:
562 self._run(can_rollback_cmd)
563 except error.AutoservRunError as e:
564 raise RootFSUpdateError("Rollback isn't possible on %s: %s" %
565 (self.host.hostname, str(e)))
566
Richard Barnette3e8b2282018-05-15 20:42:20 +0000567 rollback_cmd = '%s --rollback --follow' % _UPDATER_BIN
Chris Sosac1932172013-10-16 13:28:53 -0700568 if not powerwash:
Dan Shif3a35f72016-01-25 11:18:14 -0800569 rollback_cmd += ' --nopowerwash'
Chris Sosac1932172013-10-16 13:28:53 -0700570
Chris Sosac8617522014-06-09 23:22:26 +0000571 logging.info('Performing rollback.')
Chris Sosac1932172013-10-16 13:28:53 -0700572 try:
573 self._run(rollback_cmd)
Chris Sosac1932172013-10-16 13:28:53 -0700574 except error.AutoservRunError as e:
575 raise RootFSUpdateError('Rollback failed on %s: %s' %
576 (self.host.hostname, str(e)))
577
578 self._verify_update_completed()
579
Gilad Arnold0ed760c2012-11-05 23:42:53 -0800580
Chris Sosa72312602013-04-16 15:01:56 -0700581 def update_stateful(self, clobber=True):
582 """Updates the stateful partition.
583
584 @param clobber: If True, a clean stateful installation.
Richard Barnette9d43e562018-06-05 17:20:10 +0000585
586 @raise StatefulUpdateError if the update script fails to
587 complete successfully.
Chris Sosa72312602013-04-16 15:01:56 -0700588 """
Chris Sosa77556d82012-04-05 15:23:14 -0700589 logging.info('Updating stateful partition...')
Richard Barnette18fd5842018-05-25 18:21:14 +0000590 statefuldev_url = self.update_url.replace('update', 'static')
Chris Sosaa3ac2152012-05-23 22:23:13 -0700591
Dale Curtis5c32c722011-05-04 19:24:23 -0700592 # Attempt stateful partition update; this must succeed so that the newly
593 # installed host is testable after update.
Chris Sosa72312602013-04-16 15:01:56 -0700594 statefuldev_cmd = [self.get_stateful_update_script(), statefuldev_url]
595 if clobber:
596 statefuldev_cmd.append('--stateful_change=clean')
597
598 statefuldev_cmd.append('2>&1')
Dale Curtis5c32c722011-05-04 19:24:23 -0700599 try:
Dan Shi205b8732016-01-25 10:56:22 -0800600 self._run(' '.join(statefuldev_cmd), timeout=1200)
Dale Curtis5c32c722011-05-04 19:24:23 -0700601 except error.AutoservRunError:
Richard Barnette18fd5842018-05-25 18:21:14 +0000602 raise StatefulUpdateError(
Gilad Arnold62cf3a42015-10-01 09:15:25 -0700603 'Failed to perform stateful update on %s' %
604 self.host.hostname)
Dale Curtis5c32c722011-05-04 19:24:23 -0700605
Chris Sosaa3ac2152012-05-23 22:23:13 -0700606
Richard Barnette54d14f52018-05-18 16:39:49 +0000607 def verify_boot_expectations(self, expected_kernel, rollback_message):
Richard Barnette55d1af82018-05-22 23:40:14 +0000608 """Verifies that we fully booted given expected kernel state.
609
610 This method both verifies that we booted using the correct kernel
611 state and that the OS has marked the kernel as good.
612
Richard Barnette54d14f52018-05-18 16:39:49 +0000613 @param expected_kernel: kernel that we are verifying with,
Richard Barnette55d1af82018-05-22 23:40:14 +0000614 i.e. I expect to be booted onto partition 4 etc. See output of
615 get_kernel_state.
Richard Barnette9d43e562018-06-05 17:20:10 +0000616 @param rollback_message: string include in except message text
Richard Barnette55d1af82018-05-22 23:40:14 +0000617 if we booted with the wrong partition.
618
Richard Barnette9d43e562018-06-05 17:20:10 +0000619 @raise NewBuildUpdateError if any of the various checks fail.
Richard Barnette55d1af82018-05-22 23:40:14 +0000620 """
621 # Figure out the newly active kernel.
Richard Barnette54d14f52018-05-18 16:39:49 +0000622 active_kernel = self.get_kernel_state()[0]
Richard Barnette55d1af82018-05-22 23:40:14 +0000623
624 # Check for rollback due to a bad build.
Richard Barnette54d14f52018-05-18 16:39:49 +0000625 if active_kernel != expected_kernel:
Richard Barnette55d1af82018-05-22 23:40:14 +0000626
627 # Kernel crash reports should be wiped between test runs, but
628 # may persist from earlier parts of the test, or from problems
629 # with provisioning.
630 #
631 # Kernel crash reports will NOT be present if the crash happened
632 # before encrypted stateful is mounted.
633 #
634 # TODO(dgarrett): Integrate with server/crashcollect.py at some
635 # point.
636 kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash')
637 if kernel_crashes:
638 rollback_message += ': kernel_crash'
639 logging.debug('Found %d kernel crash reports:',
640 len(kernel_crashes))
641 # The crash names contain timestamps that may be useful:
642 # kernel.20131207.005945.0.kcrash
643 for crash in kernel_crashes:
644 logging.debug(' %s', os.path.basename(crash))
645
646 # Print out some information to make it easier to debug
647 # the rollback.
648 logging.debug('Dumping partition table.')
649 self._run('cgpt show $(rootdev -s -d)')
650 logging.debug('Dumping crossystem for firmware debugging.')
651 self._run('crossystem --all')
Richard Barnette9d43e562018-06-05 17:20:10 +0000652 raise NewBuildUpdateError(self.update_version, rollback_message)
Richard Barnette55d1af82018-05-22 23:40:14 +0000653
654 # Make sure chromeos-setgoodkernel runs.
655 try:
656 utils.poll_for_condition(
Richard Barnette54d14f52018-05-18 16:39:49 +0000657 lambda: (self._get_kernel_tries(active_kernel) == 0
658 and self._get_kernel_success(active_kernel)),
Richard Barnette9d43e562018-06-05 17:20:10 +0000659 exception=RootFSUpdateError(),
Richard Barnette55d1af82018-05-22 23:40:14 +0000660 timeout=_KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
Richard Barnette9d43e562018-06-05 17:20:10 +0000661 except RootFSUpdateError:
Richard Barnette55d1af82018-05-22 23:40:14 +0000662 services_status = self._run('status system-services').stdout
663 if services_status != 'system-services start/running\n':
664 event = ('Chrome failed to reach login screen')
665 else:
666 event = ('update-engine failed to call '
667 'chromeos-setgoodkernel')
Richard Barnette9d43e562018-06-05 17:20:10 +0000668 raise NewBuildUpdateError(self.update_version, event)
Richard Barnette55d1af82018-05-22 23:40:14 +0000669
670
Richard Barnette14ee84c2018-05-18 20:23:42 +0000671 def _prepare_host(self):
672 """Make sure the target DUT is working and ready for update.
673
674 Initially, the target DUT's state is unknown. The DUT is
675 expected to be online, but we strive to be forgiving if Chrome
676 and/or the update engine aren't fully functional.
677 """
678 # Summary of work, and the rationale:
679 # 1. Reboot, because it's a good way to clear out problems.
680 # 2. Touch the PROVISION_FAILED file, to allow repair to detect
681 # failure later.
682 # 3. Run the hook for host class specific preparation.
683 # 4. Stop Chrome, because the system is designed to eventually
684 # reboot if Chrome is stuck in a crash loop.
685 # 5. Force `update-engine` to start, because if Chrome failed
686 # to start properly, the status of the `update-engine` job
687 # will be uncertain.
688 self._reset_stateful_partition()
689 self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
690 self._run('touch %s' % PROVISION_FAILED)
691 self.host.prepare_for_update()
692 self._run('stop ui || true')
693 self._run('start update-engine || true')
694 self._wait_for_update_service()
695 logging.info('Updating from version %s to %s.',
696 self.host.get_release_version(),
697 self.update_version)
698
699
700 def _verify_devserver(self):
Richard Barnette9d43e562018-06-05 17:20:10 +0000701 """Check that our chosen devserver is still working.
702
703 @raise DevServerError if the devserver fails any sanity check.
704 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000705 server = 'http://%s' % urlparse.urlparse(self.update_url)[1]
706 try:
707 if not dev_server.ImageServer.devserver_healthy(server):
Richard Barnette9d43e562018-06-05 17:20:10 +0000708 raise DevServerError(
709 server, 'Devserver is not healthy')
Richard Barnette14ee84c2018-05-18 20:23:42 +0000710 except Exception as e:
Richard Barnette9d43e562018-06-05 17:20:10 +0000711 raise DevServerError(
712 server, 'Devserver is not up and available')
Richard Barnette14ee84c2018-05-18 20:23:42 +0000713
714
Richard Barnette54d14f52018-05-18 16:39:49 +0000715 def _install_update(self):
Richard Barnette0beb14b2018-05-15 18:07:52 +0000716 """Install the requested image on the DUT, but don't start it.
717
718 This downloads all content needed for the requested update, and
719 installs it in place on the DUT. This does not reboot the DUT,
Richard Barnette14ee84c2018-05-18 20:23:42 +0000720 so the update is merely pending when the method returns.
Dan Shi0f466e82013-02-22 15:44:58 -0800721 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000722 logging.info('Installing image at %s onto %s',
723 self.update_url, self.host.hostname)
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200724 try:
Richard Barnette14ee84c2018-05-18 20:23:42 +0000725 expected_kernel = self.update_image()
726 self.update_stateful()
Dale Curtis1e973182011-07-12 18:21:36 -0700727 logging.info('Update complete.')
Dale Curtis1e973182011-07-12 18:21:36 -0700728 except:
Richard Barnette14ee84c2018-05-18 20:23:42 +0000729 self._revert_boot_partition()
730 self._reset_stateful_partition()
Dale Curtis1e973182011-07-12 18:21:36 -0700731 # Collect update engine logs in the event of failure.
732 if self.host.job:
Aviv Keshet2610d3e2016-06-01 16:37:01 -0700733 logging.info('Collecting update engine logs due to failure...')
Dale Curtis1e973182011-07-12 18:21:36 -0700734 self.host.get_file(
Richard Barnette3e8b2282018-05-15 20:42:20 +0000735 _UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
Gilad Arnold0c0df732015-09-21 06:37:59 -0700736 preserve_perm=False)
Richard Barnette3e8b2282018-05-15 20:42:20 +0000737 _list_image_dir_contents(self.update_url)
Dale Curtis1e973182011-07-12 18:21:36 -0700738 raise
Richard Barnette4d211c92018-05-24 18:56:08 +0000739 return expected_kernel
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200740
741
Richard Barnette14ee84c2018-05-18 20:23:42 +0000742 def _complete_update(self, expected_kernel):
743 """Finish the update, and confirm that it succeeded.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000744
Richard Barnette14ee84c2018-05-18 20:23:42 +0000745 Initial condition is that the target build has been downloaded
746 and installed on the DUT, but has not yet been booted. This
747 function is responsible for rebooting the DUT, and checking that
748 the new build is running successfully.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000749
Richard Barnette14ee84c2018-05-18 20:23:42 +0000750 @param expected_kernel: kernel expected to be active after reboot.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000751 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000752 # Regarding the 'crossystem' command below: In some cases,
753 # the update flow puts the TPM into a state such that it
754 # fails verification. We don't know why. However, this
755 # call papers over the problem by clearing the TPM during
756 # the reboot.
757 #
758 # We ignore failures from 'crossystem'. Although failure
759 # here is unexpected, and could signal a bug, the point of
760 # the exercise is to paper over problems; allowing this to
761 # fail would defeat the purpose.
762 self._run('crossystem clear_tpm_owner_request=1',
763 ignore_status=True)
764 self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
765
Richard Barnette0beb14b2018-05-15 18:07:52 +0000766 # Touch the lab machine file to leave a marker that
767 # distinguishes this image from other test images.
768 # Afterwards, we must re-run the autoreboot script because
769 # it depends on the _LAB_MACHINE_FILE.
770 autoreboot_cmd = ('FILE="%s" ; [ -f "$FILE" ] || '
771 '( touch "$FILE" ; start autoreboot )')
Richard Barnette3e8b2282018-05-15 20:42:20 +0000772 self._run(autoreboot_cmd % _LAB_MACHINE_FILE)
Richard Barnette0beb14b2018-05-15 18:07:52 +0000773 self.verify_boot_expectations(
774 expected_kernel, rollback_message=
775 'Build %s failed to boot on %s; system rolled back to previous '
776 'build' % (self.update_version, self.host.hostname))
777
778 logging.debug('Cleaning up old autotest directories.')
779 try:
780 installed_autodir = autotest.Autotest.get_installed_autodir(
781 self.host)
782 self._run('rm -rf ' + installed_autodir)
783 except autotest.AutodirNotFoundError:
784 logging.debug('No autotest installed directory found.')
785
786
Richard Barnette54d14f52018-05-18 16:39:49 +0000787 def run_update(self):
Richard Barnette0beb14b2018-05-15 18:07:52 +0000788 """Perform a full update of a DUT in the test lab.
789
790 This downloads and installs the root FS and stateful partition
791 content needed for the update specified in `self.host` and
792 `self.update_url`. The update is performed according to the
793 requirements for provisioning a DUT for testing the requested
794 build.
795
Richard Barnette0beb14b2018-05-15 18:07:52 +0000796 @returns A tuple of the form `(image_name, attributes)`, where
797 `image_name` is the name of the image installed, and
798 `attributes` is new attributes to be applied to the DUT.
Richard Barnette9d43e562018-06-05 17:20:10 +0000799 @raise HostUpdateError if failure is caused by a problem on
800 the DUT prior to the update.
801 @raise ImageInstallError if the failure occurs during download
802 and install of the update and cannot be definitively
803 blamed on either the DUT or the devserver.
804 @raise NewBuildUpdateError if the failure occurs because the
805 new build fails to function correctly.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000806 """
Richard Barnette0beb14b2018-05-15 18:07:52 +0000807 server_name = dev_server.get_hostname(self.update_url)
808 (metrics.Counter('chromeos/autotest/provision/install')
809 .increment(fields={'devserver': server_name}))
810
Richard Barnette14ee84c2018-05-18 20:23:42 +0000811 self._verify_devserver()
Richard Barnette9d43e562018-06-05 17:20:10 +0000812
813 try:
814 self._prepare_host()
815 except _AttributedUpdateError:
816 raise
817 except Exception as e:
818 logging.exception('Failure preparing host prior to update.')
819 raise HostUpdateError(self.host.hostname, str(e))
820
821 try:
822 expected_kernel = self._install_update()
823 except _AttributedUpdateError:
824 raise
825 except Exception as e:
826 logging.exception('Failure during download and install.')
827 raise ImageInstallError(self.host.hostname, server_name, str(e))
828
829 try:
830 self._complete_update(expected_kernel)
831 except _AttributedUpdateError:
832 raise
833 except Exception as e:
834 logging.exception('Failure from build after update.')
835 raise NewBuildUpdateError(self.update_version, str(e))
Richard Barnette0beb14b2018-05-15 18:07:52 +0000836
Richard Barnette0beb14b2018-05-15 18:07:52 +0000837 image_name = url_to_image_name(self.update_url)
838 # update_url is different from devserver url needed to stage autotest
839 # packages, therefore, resolve a new devserver url here.
840 devserver_url = dev_server.ImageServer.resolve(
841 image_name, self.host.hostname).url()
842 repo_url = tools.get_package_url(devserver_url, image_name)
843 return image_name, {ds_constants.JOB_REPO_URL: repo_url}