blob: 300a69515c7c14dc4c87ac28cb549a29e0ef9d96 [file] [log] [blame]
Chris Sosa5e4246b2012-05-22 18:05:22 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Sean O'Connor5346e4e2010-08-12 18:49:24 +02002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Don Garrett56b1cc82013-12-06 17:49:20 -08005import glob
Sean O'Connor5346e4e2010-08-12 18:49:24 +02006import logging
Dale Curtis5c32c722011-05-04 19:24:23 -07007import os
Sean O'Connor5346e4e2010-08-12 18:49:24 +02008import re
Prashanth B32baa9b2014-03-13 13:23:01 -07009import urllib2
Richard Barnette0beb14b2018-05-15 18:07:52 +000010import urlparse
Sean O'Connor5346e4e2010-08-12 18:49:24 +020011
Chris Sosa65425082013-10-16 13:26:22 -070012from autotest_lib.client.bin import utils
Dale Curtis5c32c722011-05-04 19:24:23 -070013from autotest_lib.client.common_lib import error, global_config
Prashanth B32baa9b2014-03-13 13:23:01 -070014from autotest_lib.client.common_lib.cros import dev_server
Richard Barnette0beb14b2018-05-15 18:07:52 +000015from autotest_lib.server import autotest
Shelley Chen61d28982016-10-28 09:40:20 -070016from autotest_lib.server import utils as server_utils
Richard Barnette0beb14b2018-05-15 18:07:52 +000017from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
18from autotest_lib.server.cros.dynamic_suite import tools
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -080019from chromite.lib import retry_util
Dan Shif3a35f72016-01-25 11:18:14 -080020
Shelley Chen16b8df32016-10-27 16:24:21 -070021try:
22 from chromite.lib import metrics
Dan Shi5e2efb72017-02-07 11:40:23 -080023except ImportError:
24 metrics = utils.metrics_mock
Sean O'Connor5346e4e2010-08-12 18:49:24 +020025
Gwendal Grignou3e96cc22017-06-07 16:22:51 -070026try:
27 import devserver
Richard Barnette3e8b2282018-05-15 20:42:20 +000028 _STATEFUL_UPDATE_PATH = devserver.__path__[0]
Gwendal Grignou3e96cc22017-06-07 16:22:51 -070029except ImportError:
Richard Barnette3e8b2282018-05-15 20:42:20 +000030 _STATEFUL_UPDATE_PATH = '/usr/bin'
Gwendal Grignou3e96cc22017-06-07 16:22:51 -070031
Dale Curtis5c32c722011-05-04 19:24:23 -070032# Local stateful update path is relative to the CrOS source directory.
Sean O'Connor5346e4e2010-08-12 18:49:24 +020033UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
Sean Oc053dfe2010-08-23 18:22:26 +020034UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
beeps5e8c45a2013-12-17 22:05:11 -080035# A list of update engine client states that occur after an update is triggered.
36UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE',
37 'UPDATE_STATUS_UPDATE_AVAILABLE',
38 'UPDATE_STATUS_DOWNLOADING',
39 'UPDATE_STATUS_FINALIZING']
Sean O'Connor5346e4e2010-08-12 18:49:24 +020040
Richard Barnette0beb14b2018-05-15 18:07:52 +000041
Richard Barnette3e8b2282018-05-15 20:42:20 +000042_STATEFUL_UPDATE_SCRIPT = 'stateful_update'
43_REMOTE_STATEFUL_UPDATE_PATH = os.path.join(
44 '/usr/local/bin', _STATEFUL_UPDATE_SCRIPT)
45_REMOTE_TMP_STATEFUL_UPDATE = os.path.join(
46 '/tmp', _STATEFUL_UPDATE_SCRIPT)
47
48_UPDATER_BIN = '/usr/bin/update_engine_client'
49_UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine']
50
51_KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3}
52_KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5}
53
54# Time to wait for new kernel to be marked successful after
55# auto update.
56_KERNEL_UPDATE_TIMEOUT = 120
57
58
Richard Barnette0beb14b2018-05-15 18:07:52 +000059# PROVISION_FAILED - A flag file to indicate provision failures. The
60# file is created at the start of any AU procedure (see
61# `ChromiumOSUpdater.run_full_update()`). The file's location in
62# stateful means that on successul update it will be removed. Thus, if
63# this file exists, it indicates that we've tried and failed in a
64# previous attempt to update.
65PROVISION_FAILED = '/var/tmp/provision_failed'
66
67
Richard Barnette3e8b2282018-05-15 20:42:20 +000068# A flag file used to enable special handling in lab DUTs. Some
69# parts of the system in Chromium OS test images will behave in ways
70# convenient to the test lab when this file is present. Generally,
71# we create this immediately after any update completes.
72_LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
73
74
Sean O'Connor5346e4e2010-08-12 18:49:24 +020075class ChromiumOSError(error.InstallError):
76 """Generic error for ChromiumOS-specific exceptions."""
Gilad Arnoldd6adeb82015-09-21 07:10:03 -070077
78
Chris Sosa77556d82012-04-05 15:23:14 -070079class RootFSUpdateError(ChromiumOSError):
80 """Raised when the RootFS fails to update."""
Chris Sosa77556d82012-04-05 15:23:14 -070081
82
83class StatefulUpdateError(ChromiumOSError):
84 """Raised when the stateful partition fails to update."""
Chris Sosa77556d82012-04-05 15:23:14 -070085
86
Richard Barnette3e8b2282018-05-15 20:42:20 +000087def _url_to_version(update_url):
Dan Shi0f466e82013-02-22 15:44:58 -080088 """Return the version based on update_url.
89
90 @param update_url: url to the image to update to.
91
92 """
Dale Curtisddfdb942011-07-14 13:59:24 -070093 # The Chrome OS version is generally the last element in the URL. The only
94 # exception is delta update URLs, which are rooted under the version; e.g.,
95 # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
96 # strip off the au section of the path before reading the version.
Dan Shi5002cfc2013-04-29 10:45:05 -070097 return re.sub('/au/.*', '',
98 urlparse.urlparse(update_url).path).split('/')[-1].strip()
Sean O'Connor5346e4e2010-08-12 18:49:24 +020099
100
Scott Zawalskieadbf702013-03-14 09:23:06 -0400101def url_to_image_name(update_url):
102 """Return the image name based on update_url.
103
104 From a URL like:
105 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
106 return lumpy-release/R27-3837.0.0
107
108 @param update_url: url to the image to update to.
109 @returns a string representing the image name in the update_url.
110
111 """
112 return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
113
114
Prashanth B32baa9b2014-03-13 13:23:01 -0700115def _get_devserver_build_from_update_url(update_url):
116 """Get the devserver and build from the update url.
117
118 @param update_url: The url for update.
119 Eg: http://devserver:port/update/build.
120
121 @return: A tuple of (devserver url, build) or None if the update_url
122 doesn't match the expected pattern.
123
124 @raises ValueError: If the update_url doesn't match the expected pattern.
125 @raises ValueError: If no global_config was found, or it doesn't contain an
126 image_url_pattern.
127 """
128 pattern = global_config.global_config.get_config_value(
129 'CROS', 'image_url_pattern', type=str, default='')
130 if not pattern:
131 raise ValueError('Cannot parse update_url, the global config needs '
132 'an image_url_pattern.')
133 re_pattern = pattern.replace('%s', '(\S+)')
134 parts = re.search(re_pattern, update_url)
135 if not parts or len(parts.groups()) < 2:
136 raise ValueError('%s is not an update url' % update_url)
137 return parts.groups()
138
139
Richard Barnette3e8b2282018-05-15 20:42:20 +0000140def _list_image_dir_contents(update_url):
Prashanth B32baa9b2014-03-13 13:23:01 -0700141 """Lists the contents of the devserver for a given build/update_url.
142
143 @param update_url: An update url. Eg: http://devserver:port/update/build.
144 """
145 if not update_url:
146 logging.warning('Need update_url to list contents of the devserver.')
147 return
148 error_msg = 'Cannot check contents of devserver, update url %s' % update_url
149 try:
150 devserver_url, build = _get_devserver_build_from_update_url(update_url)
151 except ValueError as e:
152 logging.warning('%s: %s', error_msg, e)
153 return
154 devserver = dev_server.ImageServer(devserver_url)
155 try:
156 devserver.list_image_dir(build)
157 # The devserver will retry on URLError to avoid flaky connections, but will
158 # eventually raise the URLError if it persists. All HTTPErrors get
159 # converted to DevServerExceptions.
160 except (dev_server.DevServerException, urllib2.URLError) as e:
161 logging.warning('%s: %s', error_msg, e)
162
163
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700164# TODO(garnold) This implements shared updater functionality needed for
165# supporting the autoupdate_EndToEnd server-side test. We should probably
166# migrate more of the existing ChromiumOSUpdater functionality to it as we
167# expand non-CrOS support in other tests.
Richard Barnette3e8b2282018-05-15 20:42:20 +0000168class ChromiumOSUpdater(object):
169 """Chromium OS specific DUT update functionality."""
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700170
Richard Barnette3e8b2282018-05-15 20:42:20 +0000171 def __init__(self, update_url, host=None, interactive=True):
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700172 """Initializes the object.
173
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700174 @param update_url: The URL we want the update to use.
175 @param host: A client.common_lib.hosts.Host implementation.
David Haddock76a4c882017-12-13 18:50:09 -0800176 @param interactive: Bool whether we are doing an interactive update.
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700177 """
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700178 self.update_url = update_url
179 self.host = host
David Haddock76a4c882017-12-13 18:50:09 -0800180 self.interactive = interactive
Richard Barnette3e8b2282018-05-15 20:42:20 +0000181 self.update_version = _url_to_version(update_url)
182
183
184 def _run(self, cmd, *args, **kwargs):
185 """Abbreviated form of self.host.run(...)"""
186 return self.host.run(cmd, *args, **kwargs)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700187
188
189 def check_update_status(self):
190 """Returns the current update engine state.
191
192 We use the `update_engine_client -status' command and parse the line
193 indicating the update state, e.g. "CURRENT_OP=UPDATE_STATUS_IDLE".
194 """
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800195 update_status = self.host.run(command='%s -status | grep CURRENT_OP' %
Richard Barnette3e8b2282018-05-15 20:42:20 +0000196 _UPDATER_BIN)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700197 return update_status.stdout.strip().split('=')[-1]
198
199
Richard Barnette55d1af82018-05-22 23:40:14 +0000200 def _rootdev(self, options=''):
201 """Returns the stripped output of rootdev <options>.
202
203 @param options: options to run rootdev.
204
205 """
206 return self._run('rootdev %s' % options).stdout.strip()
207
208
209 def get_kernel_state(self):
210 """Returns the (<active>, <inactive>) kernel state as a pair."""
211 active_root = int(re.findall('\d+\Z', self._rootdev('-s'))[0])
212 if active_root == _KERNEL_A['root']:
213 return _KERNEL_A, _KERNEL_B
214 elif active_root == _KERNEL_B['root']:
215 return _KERNEL_B, _KERNEL_A
216 else:
217 raise ChromiumOSError('Encountered unknown root partition: %s' %
218 active_root)
219
220
Richard Barnette18fd5842018-05-25 18:21:14 +0000221 def _cgpt(self, flag, kernel):
222 """Return numeric cgpt value for the specified flag, kernel, device."""
223 return int(self._run('cgpt show -n -i %d %s $(rootdev -s -d)' % (
224 kernel['kernel'], flag)).stdout.strip())
Richard Barnette55d1af82018-05-22 23:40:14 +0000225
226
227 def _get_next_kernel(self):
228 """Return the kernel that has priority for the next boot."""
229 priority_a = self._cgpt('-P', _KERNEL_A)
230 priority_b = self._cgpt('-P', _KERNEL_B)
231 if priority_a > priority_b:
232 return _KERNEL_A
233 else:
234 return _KERNEL_B
235
236
237 def _get_kernel_success(self, kernel):
238 """Return boolean success flag for the specified kernel.
239
240 @param kernel: information of the given kernel, either _KERNEL_A
241 or _KERNEL_B.
242 """
243 return self._cgpt('-S', kernel) != 0
244
245
246 def _get_kernel_tries(self, kernel):
247 """Return tries count for the specified kernel.
248
249 @param kernel: information of the given kernel, either _KERNEL_A
250 or _KERNEL_B.
251 """
252 return self._cgpt('-T', kernel)
253
254
Richard Barnette3e8b2282018-05-15 20:42:20 +0000255 def _get_last_update_error(self):
Shuqian Zhaod9992722016-02-29 12:26:38 -0800256 """Get the last autoupdate error code."""
Richard Barnette3e8b2282018-05-15 20:42:20 +0000257 command_result = self._run(
258 '%s --last_attempt_error' % _UPDATER_BIN)
259 return command_result.stdout.strip().replace('\n', ', ')
Shuqian Zhaod9992722016-02-29 12:26:38 -0800260
261
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800262 def _base_update_handler_no_retry(self, run_args):
Shuqian Zhaod9992722016-02-29 12:26:38 -0800263 """Base function to handle a remote update ssh call.
264
265 @param run_args: Dictionary of args passed to ssh_host.run function.
Shuqian Zhaod9992722016-02-29 12:26:38 -0800266
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800267 @throws: intercepts and re-throws all exceptions
Shuqian Zhaod9992722016-02-29 12:26:38 -0800268 """
Shuqian Zhaod9992722016-02-29 12:26:38 -0800269 try:
270 self.host.run(**run_args)
Shuqian Zhaod9992722016-02-29 12:26:38 -0800271 except Exception as e:
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800272 logging.debug('exception in update handler: %s', e)
273 raise e
Shuqian Zhaod9992722016-02-29 12:26:38 -0800274
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800275
276 def _base_update_handler(self, run_args, err_msg_prefix=None):
277 """Handle a remote update ssh call, possibly with retries.
278
279 @param run_args: Dictionary of args passed to ssh_host.run function.
280 @param err_msg_prefix: Prefix of the exception error message.
281 """
282 def exception_handler(e):
283 """Examines exceptions and returns True if the update handler
284 should be retried.
285
286 @param e: the exception intercepted by the retry util.
287 """
288 return (isinstance(e, error.AutoservSSHTimeout) or
289 (isinstance(e, error.GenericHostRunError) and
290 hasattr(e, 'description') and
291 (re.search('ERROR_CODE=37', e.description) or
292 re.search('generic error .255.', e.description))))
293
294 try:
295 # Try the update twice (arg 2 is max_retry, not including the first
296 # call). Some exceptions may be caught by the retry handler.
297 retry_util.GenericRetry(exception_handler, 1,
298 self._base_update_handler_no_retry,
299 run_args)
300 except Exception as e:
301 message = err_msg_prefix + ': ' + str(e)
302 raise RootFSUpdateError(message)
Shuqian Zhaod9992722016-02-29 12:26:38 -0800303
304
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800305 def _wait_for_update_service(self):
306 """Ensure that the update engine daemon is running, possibly
307 by waiting for it a bit in case the DUT just rebooted and the
308 service hasn't started yet.
309 """
310 def handler(e):
311 """Retry exception handler.
312
313 Assumes that the error is due to the update service not having
314 started yet.
315
316 @param e: the exception intercepted by the retry util.
317 """
318 if isinstance(e, error.AutoservRunError):
319 logging.debug('update service check exception: %s\n'
320 'retrying...', e)
321 return True
322 else:
323 return False
324
325 # Retry at most three times, every 5s.
326 status = retry_util.GenericRetry(handler, 3,
327 self.check_update_status,
328 sleep=5)
329
330 # Expect the update engine to be idle.
331 if status != UPDATER_IDLE:
332 raise ChromiumOSError('%s is not in an installable state' %
333 self.host.hostname)
334
335
Richard Barnette55d1af82018-05-22 23:40:14 +0000336 def _reset_update_engine(self):
337 """Resets the host to prepare for a clean update regardless of state."""
338 self._run('stop ui || true')
339 self._run('stop update-engine || true')
340 self._run('start update-engine')
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700341
Richard Barnette55d1af82018-05-22 23:40:14 +0000342 # Wait for update engine to be ready.
Luigi Semenzatof15c8fc2017-03-03 14:12:40 -0800343 self._wait_for_update_service()
344
Richard Barnette55d1af82018-05-22 23:40:14 +0000345
346 def _reset_stateful_partition(self):
347 """Clear any pending stateful update request."""
Richard Barnette18fd5842018-05-25 18:21:14 +0000348 self._run('%s --stateful_change=reset 2>&1'
349 % self.get_stateful_update_script())
Richard Barnette55d1af82018-05-22 23:40:14 +0000350
351
352 def _revert_boot_partition(self):
353 """Revert the boot partition."""
354 part = self._rootdev('-s')
355 logging.warning('Reverting update; Boot partition will be %s', part)
356 return self._run('/postinst %s 2>&1' % part)
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700357
358
Allen Lib5420a72017-06-20 14:14:07 -0700359 def _get_metric_fields(self):
360 """Return a dict of metric fields.
361
362 This is used for sending autoupdate metrics for this instance.
363 """
364 build_name = url_to_image_name(self.update_url)
365 try:
366 board, build_type, milestone, _ = server_utils.ParseBuildName(
367 build_name)
368 except server_utils.ParseBuildNameException:
369 logging.warning('Unable to parse build name %s for metrics. '
370 'Continuing anyway.', build_name)
371 board, build_type, milestone = ('', '', '')
372 return {
373 'dev_server': dev_server.get_hostname(self.update_url),
374 'board': board,
375 'build_type': build_type,
376 'milestone': milestone,
377 }
378
379
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700380 def _verify_update_completed(self):
381 """Verifies that an update has completed.
382
383 @raise RootFSUpdateError: if verification fails.
384 """
385 status = self.check_update_status()
386 if status != UPDATER_NEED_REBOOT:
Shuqian Zhaod9992722016-02-29 12:26:38 -0800387 error_msg = ''
388 if status == UPDATER_IDLE:
Richard Barnette3e8b2282018-05-15 20:42:20 +0000389 error_msg = 'Update error: %s' % self._get_last_update_error()
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700390 raise RootFSUpdateError('Update did not complete with correct '
Shuqian Zhaod9992722016-02-29 12:26:38 -0800391 'status. Expecting %s, actual %s. %s' %
392 (UPDATER_NEED_REBOOT, status, error_msg))
Richard Barnette4d211c92018-05-24 18:56:08 +0000393 inactive_kernel = self.get_kernel_state()[1]
394 next_kernel = self._get_next_kernel()
395 if next_kernel != inactive_kernel:
396 raise ChromiumOSError(
397 'Update failed. The kernel for next boot is %s, '
398 'but %s was expected.' %
399 (next_kernel['name'], inactive_kernel['name']))
400 return inactive_kernel
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700401
402
Richard Barnette55d1af82018-05-22 23:40:14 +0000403 def trigger_update(self):
404 """Triggers a background update.
405
406 @raise RootFSUpdateError or unknown Exception if anything went wrong.
407 """
408 # If this function is called immediately after reboot (which it is at
409 # this time), there is no guarantee that the update service is up and
410 # running yet, so wait for it.
411 self._wait_for_update_service()
412
413 autoupdate_cmd = ('%s --check_for_update --omaha_url=%s' %
414 (_UPDATER_BIN, self.update_url))
415 run_args = {'command': autoupdate_cmd}
416 err_prefix = 'Failed to trigger an update on %s. ' % self.host.hostname
417 logging.info('Triggering update via: %s', autoupdate_cmd)
418 metric_fields = {'success': False}
419 try:
420 self._base_update_handler(run_args, err_prefix)
421 metric_fields['success'] = True
422 finally:
423 c = metrics.Counter('chromeos/autotest/autoupdater/trigger')
424 metric_fields.update(self._get_metric_fields())
425 c.increment(fields=metric_fields)
426
427
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700428 def update_image(self):
Richard Barnette18fd5842018-05-25 18:21:14 +0000429 """Updates the device root FS and kernel and verifies success."""
Shuqian Zhaofe4d62e2016-06-23 14:46:45 -0700430 autoupdate_cmd = ('%s --update --omaha_url=%s' %
Richard Barnette3e8b2282018-05-15 20:42:20 +0000431 (_UPDATER_BIN, self.update_url))
David Haddock76a4c882017-12-13 18:50:09 -0800432 if not self.interactive:
433 autoupdate_cmd = '%s --interactive=false' % autoupdate_cmd
Shuqian Zhaod9992722016-02-29 12:26:38 -0800434 run_args = {'command': autoupdate_cmd, 'timeout': 3600}
435 err_prefix = ('Failed to install device image using payload at %s '
436 'on %s. ' % (self.update_url, self.host.hostname))
437 logging.info('Updating image via: %s', autoupdate_cmd)
Allen Li1a5cc0a2017-06-20 14:08:59 -0700438 metric_fields = {'success': False}
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800439 try:
Luigi Semenzatoe76d9f82016-11-21 11:15:10 -0800440 self._base_update_handler(run_args, err_prefix)
Allen Li1a5cc0a2017-06-20 14:08:59 -0700441 metric_fields['success'] = True
442 finally:
Allen Li1a5cc0a2017-06-20 14:08:59 -0700443 c = metrics.Counter('chromeos/autotest/autoupdater/update')
Allen Lib5420a72017-06-20 14:14:07 -0700444 metric_fields.update(self._get_metric_fields())
Allen Li1a5cc0a2017-06-20 14:08:59 -0700445 c.increment(fields=metric_fields)
Richard Barnette4d211c92018-05-24 18:56:08 +0000446 return self._verify_update_completed()
Gilad Arnoldd6adeb82015-09-21 07:10:03 -0700447
448
Chris Sosa5e4246b2012-05-22 18:05:22 -0700449 def get_stateful_update_script(self):
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700450 """Returns the path to the stateful update script on the target.
Chris Sosa5e4246b2012-05-22 18:05:22 -0700451
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700452 When runnning test_that, stateful_update is in chroot /usr/sbin,
453 as installed by chromeos-base/devserver packages.
454 In the lab, it is installed with the python module devserver, by
455 build_externals.py command.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700456
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700457 If we can find it, we hope it exists already on the DUT, we assert
458 otherwise.
459 """
Richard Barnette3e8b2282018-05-15 20:42:20 +0000460 stateful_update_file = os.path.join(_STATEFUL_UPDATE_PATH,
461 _STATEFUL_UPDATE_SCRIPT)
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700462 if os.path.exists(stateful_update_file):
Chris Sosa5e4246b2012-05-22 18:05:22 -0700463 self.host.send_file(
Richard Barnette3e8b2282018-05-15 20:42:20 +0000464 stateful_update_file, _REMOTE_TMP_STATEFUL_UPDATE,
Gilad Arnold0c0df732015-09-21 06:37:59 -0700465 delete_dest=True)
Richard Barnette3e8b2282018-05-15 20:42:20 +0000466 return _REMOTE_TMP_STATEFUL_UPDATE
Chris Sosa5e4246b2012-05-22 18:05:22 -0700467
Richard Barnette3e8b2282018-05-15 20:42:20 +0000468 if self.host.path_exists(_REMOTE_STATEFUL_UPDATE_PATH):
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700469 logging.warning('Could not chroot %s script, falling back on %s',
Richard Barnette3e8b2282018-05-15 20:42:20 +0000470 _STATEFUL_UPDATE_SCRIPT,
471 _REMOTE_STATEFUL_UPDATE_PATH)
472 return _REMOTE_STATEFUL_UPDATE_PATH
Gwendal Grignou3e96cc22017-06-07 16:22:51 -0700473 else:
Richard Barnette3e8b2282018-05-15 20:42:20 +0000474 raise ChromiumOSError('Could not locate %s' %
475 _STATEFUL_UPDATE_SCRIPT)
Chris Sosa5e4246b2012-05-22 18:05:22 -0700476
477
Chris Sosac1932172013-10-16 13:28:53 -0700478 def rollback_rootfs(self, powerwash):
479 """Triggers rollback and waits for it to complete.
480
481 @param powerwash: If true, powerwash as part of rollback.
482
483 @raise RootFSUpdateError if anything went wrong.
484
485 """
Dan Shi549fb822015-03-24 18:01:11 -0700486 version = self.host.get_release_version()
Chris Sosac8617522014-06-09 23:22:26 +0000487 # Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches
488 # X.Y.Z. This version split just pulls the first part out.
489 try:
490 build_number = int(version.split('.')[0])
491 except ValueError:
492 logging.error('Could not parse build number.')
493 build_number = 0
494
495 if build_number >= 5772:
Richard Barnette3e8b2282018-05-15 20:42:20 +0000496 can_rollback_cmd = '%s --can_rollback' % _UPDATER_BIN
Chris Sosac8617522014-06-09 23:22:26 +0000497 logging.info('Checking for rollback.')
498 try:
499 self._run(can_rollback_cmd)
500 except error.AutoservRunError as e:
501 raise RootFSUpdateError("Rollback isn't possible on %s: %s" %
502 (self.host.hostname, str(e)))
503
Richard Barnette3e8b2282018-05-15 20:42:20 +0000504 rollback_cmd = '%s --rollback --follow' % _UPDATER_BIN
Chris Sosac1932172013-10-16 13:28:53 -0700505 if not powerwash:
Dan Shif3a35f72016-01-25 11:18:14 -0800506 rollback_cmd += ' --nopowerwash'
Chris Sosac1932172013-10-16 13:28:53 -0700507
Chris Sosac8617522014-06-09 23:22:26 +0000508 logging.info('Performing rollback.')
Chris Sosac1932172013-10-16 13:28:53 -0700509 try:
510 self._run(rollback_cmd)
Chris Sosac1932172013-10-16 13:28:53 -0700511 except error.AutoservRunError as e:
512 raise RootFSUpdateError('Rollback failed on %s: %s' %
513 (self.host.hostname, str(e)))
514
515 self._verify_update_completed()
516
Gilad Arnold0ed760c2012-11-05 23:42:53 -0800517
Chris Sosa72312602013-04-16 15:01:56 -0700518 def update_stateful(self, clobber=True):
519 """Updates the stateful partition.
520
521 @param clobber: If True, a clean stateful installation.
522 """
Chris Sosa77556d82012-04-05 15:23:14 -0700523 logging.info('Updating stateful partition...')
Richard Barnette18fd5842018-05-25 18:21:14 +0000524 statefuldev_url = self.update_url.replace('update', 'static')
Chris Sosaa3ac2152012-05-23 22:23:13 -0700525
Dale Curtis5c32c722011-05-04 19:24:23 -0700526 # Attempt stateful partition update; this must succeed so that the newly
527 # installed host is testable after update.
Chris Sosa72312602013-04-16 15:01:56 -0700528 statefuldev_cmd = [self.get_stateful_update_script(), statefuldev_url]
529 if clobber:
530 statefuldev_cmd.append('--stateful_change=clean')
531
532 statefuldev_cmd.append('2>&1')
Dale Curtis5c32c722011-05-04 19:24:23 -0700533 try:
Dan Shi205b8732016-01-25 10:56:22 -0800534 self._run(' '.join(statefuldev_cmd), timeout=1200)
Dale Curtis5c32c722011-05-04 19:24:23 -0700535 except error.AutoservRunError:
Richard Barnette18fd5842018-05-25 18:21:14 +0000536 raise StatefulUpdateError(
Gilad Arnold62cf3a42015-10-01 09:15:25 -0700537 'Failed to perform stateful update on %s' %
538 self.host.hostname)
Dale Curtis5c32c722011-05-04 19:24:23 -0700539
Chris Sosaa3ac2152012-05-23 22:23:13 -0700540
Richard Barnette54d14f52018-05-18 16:39:49 +0000541 def verify_boot_expectations(self, expected_kernel, rollback_message):
Richard Barnette55d1af82018-05-22 23:40:14 +0000542 """Verifies that we fully booted given expected kernel state.
543
544 This method both verifies that we booted using the correct kernel
545 state and that the OS has marked the kernel as good.
546
Richard Barnette54d14f52018-05-18 16:39:49 +0000547 @param expected_kernel: kernel that we are verifying with,
Richard Barnette55d1af82018-05-22 23:40:14 +0000548 i.e. I expect to be booted onto partition 4 etc. See output of
549 get_kernel_state.
550 @param rollback_message: string to raise as a ChromiumOSError
551 if we booted with the wrong partition.
552
553 @raises ChromiumOSError: If we didn't.
554 """
555 # Figure out the newly active kernel.
Richard Barnette54d14f52018-05-18 16:39:49 +0000556 active_kernel = self.get_kernel_state()[0]
Richard Barnette55d1af82018-05-22 23:40:14 +0000557
558 # Check for rollback due to a bad build.
Richard Barnette54d14f52018-05-18 16:39:49 +0000559 if active_kernel != expected_kernel:
Richard Barnette55d1af82018-05-22 23:40:14 +0000560
561 # Kernel crash reports should be wiped between test runs, but
562 # may persist from earlier parts of the test, or from problems
563 # with provisioning.
564 #
565 # Kernel crash reports will NOT be present if the crash happened
566 # before encrypted stateful is mounted.
567 #
568 # TODO(dgarrett): Integrate with server/crashcollect.py at some
569 # point.
570 kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash')
571 if kernel_crashes:
572 rollback_message += ': kernel_crash'
573 logging.debug('Found %d kernel crash reports:',
574 len(kernel_crashes))
575 # The crash names contain timestamps that may be useful:
576 # kernel.20131207.005945.0.kcrash
577 for crash in kernel_crashes:
578 logging.debug(' %s', os.path.basename(crash))
579
580 # Print out some information to make it easier to debug
581 # the rollback.
582 logging.debug('Dumping partition table.')
583 self._run('cgpt show $(rootdev -s -d)')
584 logging.debug('Dumping crossystem for firmware debugging.')
585 self._run('crossystem --all')
586 raise ChromiumOSError(rollback_message)
587
588 # Make sure chromeos-setgoodkernel runs.
589 try:
590 utils.poll_for_condition(
Richard Barnette54d14f52018-05-18 16:39:49 +0000591 lambda: (self._get_kernel_tries(active_kernel) == 0
592 and self._get_kernel_success(active_kernel)),
Richard Barnette55d1af82018-05-22 23:40:14 +0000593 exception=ChromiumOSError(),
594 timeout=_KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
595 except ChromiumOSError:
596 services_status = self._run('status system-services').stdout
597 if services_status != 'system-services start/running\n':
598 event = ('Chrome failed to reach login screen')
599 else:
600 event = ('update-engine failed to call '
601 'chromeos-setgoodkernel')
602 raise ChromiumOSError(
603 'After update and reboot, %s '
604 'within %d seconds' % (event, _KERNEL_UPDATE_TIMEOUT))
605
606
Richard Barnette14ee84c2018-05-18 20:23:42 +0000607 def _prepare_host(self):
608 """Make sure the target DUT is working and ready for update.
609
610 Initially, the target DUT's state is unknown. The DUT is
611 expected to be online, but we strive to be forgiving if Chrome
612 and/or the update engine aren't fully functional.
613 """
614 # Summary of work, and the rationale:
615 # 1. Reboot, because it's a good way to clear out problems.
616 # 2. Touch the PROVISION_FAILED file, to allow repair to detect
617 # failure later.
618 # 3. Run the hook for host class specific preparation.
619 # 4. Stop Chrome, because the system is designed to eventually
620 # reboot if Chrome is stuck in a crash loop.
621 # 5. Force `update-engine` to start, because if Chrome failed
622 # to start properly, the status of the `update-engine` job
623 # will be uncertain.
624 self._reset_stateful_partition()
625 self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
626 self._run('touch %s' % PROVISION_FAILED)
627 self.host.prepare_for_update()
628 self._run('stop ui || true')
629 self._run('start update-engine || true')
630 self._wait_for_update_service()
631 logging.info('Updating from version %s to %s.',
632 self.host.get_release_version(),
633 self.update_version)
634
635
636 def _verify_devserver(self):
637 """Check that our chosen devserver is still working."""
638 server = 'http://%s' % urlparse.urlparse(self.update_url)[1]
639 try:
640 if not dev_server.ImageServer.devserver_healthy(server):
641 raise ChromiumOSError(
642 'Update server at %s not healthy' % server)
643 except Exception as e:
644 raise ChromiumOSError(
645 'Update server at %s is not available' % server)
646
647
Richard Barnette54d14f52018-05-18 16:39:49 +0000648 def _install_update(self):
Richard Barnette0beb14b2018-05-15 18:07:52 +0000649 """Install the requested image on the DUT, but don't start it.
650
651 This downloads all content needed for the requested update, and
652 installs it in place on the DUT. This does not reboot the DUT,
Richard Barnette14ee84c2018-05-18 20:23:42 +0000653 so the update is merely pending when the method returns.
Dan Shi0f466e82013-02-22 15:44:58 -0800654 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000655 logging.info('Installing image at %s onto %s',
656 self.update_url, self.host.hostname)
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200657 try:
Richard Barnette14ee84c2018-05-18 20:23:42 +0000658 expected_kernel = self.update_image()
659 self.update_stateful()
Dale Curtis1e973182011-07-12 18:21:36 -0700660 logging.info('Update complete.')
Dale Curtis1e973182011-07-12 18:21:36 -0700661 except:
Richard Barnette14ee84c2018-05-18 20:23:42 +0000662 self._revert_boot_partition()
663 self._reset_stateful_partition()
Dale Curtis1e973182011-07-12 18:21:36 -0700664 # Collect update engine logs in the event of failure.
665 if self.host.job:
Aviv Keshet2610d3e2016-06-01 16:37:01 -0700666 logging.info('Collecting update engine logs due to failure...')
Dale Curtis1e973182011-07-12 18:21:36 -0700667 self.host.get_file(
Richard Barnette3e8b2282018-05-15 20:42:20 +0000668 _UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
Gilad Arnold0c0df732015-09-21 06:37:59 -0700669 preserve_perm=False)
Richard Barnette3e8b2282018-05-15 20:42:20 +0000670 _list_image_dir_contents(self.update_url)
Dale Curtis1e973182011-07-12 18:21:36 -0700671 raise
Richard Barnette4d211c92018-05-24 18:56:08 +0000672 return expected_kernel
Sean O'Connor5346e4e2010-08-12 18:49:24 +0200673
674
Richard Barnette14ee84c2018-05-18 20:23:42 +0000675 def _complete_update(self, expected_kernel):
676 """Finish the update, and confirm that it succeeded.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000677
Richard Barnette14ee84c2018-05-18 20:23:42 +0000678 Initial condition is that the target build has been downloaded
679 and installed on the DUT, but has not yet been booted. This
680 function is responsible for rebooting the DUT, and checking that
681 the new build is running successfully.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000682
Richard Barnette14ee84c2018-05-18 20:23:42 +0000683 @param expected_kernel: kernel expected to be active after reboot.
Richard Barnette0beb14b2018-05-15 18:07:52 +0000684 """
Richard Barnette14ee84c2018-05-18 20:23:42 +0000685 # Regarding the 'crossystem' command below: In some cases,
686 # the update flow puts the TPM into a state such that it
687 # fails verification. We don't know why. However, this
688 # call papers over the problem by clearing the TPM during
689 # the reboot.
690 #
691 # We ignore failures from 'crossystem'. Although failure
692 # here is unexpected, and could signal a bug, the point of
693 # the exercise is to paper over problems; allowing this to
694 # fail would defeat the purpose.
695 self._run('crossystem clear_tpm_owner_request=1',
696 ignore_status=True)
697 self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
698
Richard Barnette0beb14b2018-05-15 18:07:52 +0000699 # Touch the lab machine file to leave a marker that
700 # distinguishes this image from other test images.
701 # Afterwards, we must re-run the autoreboot script because
702 # it depends on the _LAB_MACHINE_FILE.
703 autoreboot_cmd = ('FILE="%s" ; [ -f "$FILE" ] || '
704 '( touch "$FILE" ; start autoreboot )')
Richard Barnette3e8b2282018-05-15 20:42:20 +0000705 self._run(autoreboot_cmd % _LAB_MACHINE_FILE)
Richard Barnette0beb14b2018-05-15 18:07:52 +0000706 self.verify_boot_expectations(
707 expected_kernel, rollback_message=
708 'Build %s failed to boot on %s; system rolled back to previous '
709 'build' % (self.update_version, self.host.hostname))
710
711 logging.debug('Cleaning up old autotest directories.')
712 try:
713 installed_autodir = autotest.Autotest.get_installed_autodir(
714 self.host)
715 self._run('rm -rf ' + installed_autodir)
716 except autotest.AutodirNotFoundError:
717 logging.debug('No autotest installed directory found.')
718
719
Richard Barnette54d14f52018-05-18 16:39:49 +0000720 def run_update(self):
Richard Barnette0beb14b2018-05-15 18:07:52 +0000721 """Perform a full update of a DUT in the test lab.
722
723 This downloads and installs the root FS and stateful partition
724 content needed for the update specified in `self.host` and
725 `self.update_url`. The update is performed according to the
726 requirements for provisioning a DUT for testing the requested
727 build.
728
Richard Barnette0beb14b2018-05-15 18:07:52 +0000729 @returns A tuple of the form `(image_name, attributes)`, where
730 `image_name` is the name of the image installed, and
731 `attributes` is new attributes to be applied to the DUT.
732 """
Richard Barnette0beb14b2018-05-15 18:07:52 +0000733 server_name = dev_server.get_hostname(self.update_url)
734 (metrics.Counter('chromeos/autotest/provision/install')
735 .increment(fields={'devserver': server_name}))
736
Richard Barnette14ee84c2018-05-18 20:23:42 +0000737 self._verify_devserver()
738 self._prepare_host()
Richard Barnette4d211c92018-05-24 18:56:08 +0000739 expected_kernel = self._install_update()
Richard Barnette14ee84c2018-05-18 20:23:42 +0000740 self._complete_update(expected_kernel)
Richard Barnette0beb14b2018-05-15 18:07:52 +0000741
Richard Barnette0beb14b2018-05-15 18:07:52 +0000742 image_name = url_to_image_name(self.update_url)
743 # update_url is different from devserver url needed to stage autotest
744 # packages, therefore, resolve a new devserver url here.
745 devserver_url = dev_server.ImageServer.resolve(
746 image_name, self.host.hostname).url()
747 repo_url = tools.get_package_url(devserver_url, image_name)
748 return image_name, {ds_constants.JOB_REPO_URL: repo_url}