blob: 3e0c04f79cc111ccf4bfb593caa1becdc1f66192 [file] [log] [blame]
Derek Beckettf73baca2020-08-19 15:08:47 -07001# Lint as: python2, python3
Richard Barnette90ad4262016-11-17 17:29:24 -08002# Copyright 2016 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
Richard Barnette1bf22a32016-11-18 16:14:31 -08006"""
7Repair actions and verifiers relating to CrOS firmware.
8
9This contains the repair actions and verifiers need to find problems
10with the firmware installed on Chrome OS DUTs, and when necessary, to
11fix problems by updating or re-installing the firmware.
Richard Barnette077665e2016-11-29 16:00:59 -080012
13The operations in the module support two distinct use cases:
14 * DUTs used for FAFT tests can in some cases have problems with
15 corrupted firmware. The module supplies `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070016 to check for corruption, and supplies `FaftFirmwareRepair` to
17 re-install firmware of current faft stable_version via servo
18 when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080019 * DUTs used for general testing normally should be running a
20 designated "stable" firmware version. This module supplies
21 `FirmwareVersionVerifier` to detect and automatically update
Garry Wangad2a1712020-03-26 15:06:43 -070022 firmware that is out-of-date from the designated version. This model
23 also supplys `GeneralFirmwareRepair` to re-install firmware that
24 tied with current stable_version image via servo when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080025
26For purposes of the operations in the module, we distinguish three kinds
27of DUT, based on pool assignments:
28 * DUTs used for general testing. These DUTs automatically check for
29 and install the stable firmware using `FirmwareVersionVerifier`.
30 * DUTs in pools used for FAFT testing. These check for bad firmware
31 builds with `FirmwareStatusVerifier`, and will fix problems using
32 `FirmwareRepair`. These DUTs don't check for or install the
33 stable firmware.
34 * DUTs not in general pools, and not used for FAFT. These DUTs
35 are expected to be managed by separate processes and are excluded
36 from all of the verification and repair code in this module.
Richard Barnette1bf22a32016-11-18 16:14:31 -080037"""
38
Xixuan Wu93e646c2017-12-07 18:36:10 -080039# pylint: disable=missing-docstring
40
Derek Beckettf73baca2020-08-19 15:08:47 -070041from __future__ import absolute_import
42from __future__ import division
43from __future__ import print_function
44
Hung-Te Lina014dbc2019-11-07 16:41:42 +080045import json
Richard Barnette90ad4262016-11-17 17:29:24 -080046import logging
Richard Barnette90ad4262016-11-17 17:29:24 -080047
48import common
Richard Barnette1bf22a32016-11-18 16:14:31 -080049from autotest_lib.client.common_lib import global_config
Richard Barnette90ad4262016-11-17 17:29:24 -080050from autotest_lib.client.common_lib import hosts
51from autotest_lib.server import afe_utils
Derek Beckett3d743402021-08-04 09:25:44 -070052from autotest_lib.server import tauto_warnings
Gregory Nisbetd3007d22020-09-02 12:04:07 -070053from autotest_lib.server.hosts import cros_constants
54
Mike Frysingerba2c0df2021-01-23 00:56:47 -050055from autotest_lib.utils.frozen_chromite.lib import timeout_util
Derek Beckettf73baca2020-08-19 15:08:47 -070056import six
Richard Barnette1bf22a32016-11-18 16:14:31 -080057
58
Richard Barnette077665e2016-11-29 16:00:59 -080059# _FIRMWARE_REPAIR_POOLS - The set of pools that should be
60# managed by `FirmwareStatusVerifier` and `FirmwareRepair`.
61#
62_FIRMWARE_REPAIR_POOLS = set(
63 global_config.global_config.get_config_value(
64 'CROS',
65 'pools_support_firmware_repair',
66 type=str).split(','))
67
68
Garry Wangad2a1712020-03-26 15:06:43 -070069def _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -080070 """
Garry Wangad2a1712020-03-26 15:06:43 -070071 check if a host is dedicated for firmware testing.
Richard Barnette1bf22a32016-11-18 16:14:31 -080072
Richard Barnette077665e2016-11-29 16:00:59 -080073 When this function returns true, the DUT should be managed by
Garry Wangad2a1712020-03-26 15:06:43 -070074 `FirmwareStatusVerifier` and `FaftFirmwareRepair`, but not
75 `FirmwareVersionVerifier` and `GeneralFirmwareRepair.
Richard Barnette1bf22a32016-11-18 16:14:31 -080076
Richard Barnette077665e2016-11-29 16:00:59 -080077 @return A true value if the host should use `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070078 and `FaftFirmwareRepair`; a false value otherwise.
Richard Barnette1bf22a32016-11-18 16:14:31 -080079 """
Prathmesh Prabhub6cea612017-02-09 15:41:19 -080080 info = host.host_info_store.get()
81 return bool(info.pools & _FIRMWARE_REPAIR_POOLS)
Richard Barnette077665e2016-11-29 16:00:59 -080082
83
84def _is_firmware_update_supported(host):
85 """
86 Return whether a DUT should be running the standard firmware.
87
88 In the test lab, DUTs used for general testing, (e.g. the `bvt`
89 pool) need their firmware kept up-to-date with
90 `FirmwareVersionVerifier`. However, some pools have alternative
91 policies for firmware management. This returns whether a given DUT
92 should be updated via the standard stable version update, or
93 managed by some other procedure.
94
95 @param host The host to be checked for update policy.
96 @return A true value if the host should use
97 `FirmwareVersionVerifier`; a false value otherwise.
98 """
Garry Wangad2a1712020-03-26 15:06:43 -070099 return not _is_firmware_testing_device(host)
Richard Barnette1bf22a32016-11-18 16:14:31 -0800100
101
Ningning Xia05af7402018-02-13 18:19:10 -0800102def _get_available_firmware(host, model):
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800103 """Get the available RW firmware version given the model.
Ningning Xia05af7402018-02-13 18:19:10 -0800104
105 @param host The host to get available firmware for.
106 @param model The model name to get corresponding firmware version.
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800107 @return The available RW firmware version if found, else, None.
Ningning Xia05af7402018-02-13 18:19:10 -0800108 """
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800109 result = host.run('chromeos-firmwareupdate --manifest', ignore_status=True)
Ningning Xia05af7402018-02-13 18:19:10 -0800110
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800111 if result.exit_status != 0:
112 return None
Ningning Xia05af7402018-02-13 18:19:10 -0800113
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800114 # The manifest is a JSON in .model.host.versions.rw
115 data = json.loads(result.stdout) or {}
Derek Beckettf73baca2020-08-19 15:08:47 -0700116 key = model if len(data) > 1 else next(six.iterkeys(data), '')
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800117 key += '.host.versions.rw'
118 for k in key.split('.'):
119 data = data.get(k, {})
120 return data or None
Ningning Xia05af7402018-02-13 18:19:10 -0800121
122
Richard Barnette1bf22a32016-11-18 16:14:31 -0800123class FirmwareStatusVerifier(hosts.Verifier):
124 """
125 Verify that a host's firmware is in a good state.
126
127 For DUTs that run firmware tests, it's possible that the firmware
128 on the DUT can get corrupted. This verifier checks whether it
129 appears that firmware should be re-flashed using servo.
130 """
131
Gregory Nisbetd3007d22020-09-02 12:04:07 -0700132 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
Richard Barnette1bf22a32016-11-18 16:14:31 -0800133 def verify(self, host):
Garry Wangad2a1712020-03-26 15:06:43 -0700134 if not _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -0800135 return
136 try:
137 # Read the AP firmware and dump the sections that we're
138 # interested in.
139 cmd = ('mkdir /tmp/verify_firmware; '
140 'cd /tmp/verify_firmware; '
141 'for section in VBLOCK_A VBLOCK_B FW_MAIN_A FW_MAIN_B; '
Chris McDonald9e6f9df2018-10-03 12:12:06 -0600142 'do flashrom -p host -r -i $section:$section; '
Richard Barnette1bf22a32016-11-18 16:14:31 -0800143 'done')
144 host.run(cmd)
145
146 # Verify the firmware blocks A and B.
147 cmd = ('vbutil_firmware --verify /tmp/verify_firmware/VBLOCK_%c'
148 ' --signpubkey /usr/share/vboot/devkeys/root_key.vbpubk'
149 ' --fv /tmp/verify_firmware/FW_MAIN_%c')
150 for c in ('A', 'B'):
151 rv = host.run(cmd % (c, c), ignore_status=True)
152 if rv.exit_status:
153 raise hosts.AutoservVerifyError(
154 'Firmware %c is in a bad state.' % c)
155 finally:
156 # Remove the temporary files.
157 host.run('rm -rf /tmp/verify_firmware')
158
159 @property
160 def description(self):
161 return 'Firmware on this DUT is clean'
Richard Barnette90ad4262016-11-17 17:29:24 -0800162
163
Richard Barnette077665e2016-11-29 16:00:59 -0800164class FirmwareRepair(hosts.RepairAction):
165 """
166 Reinstall the firmware image using servo.
167
168 This repair function attempts to use servo to install the DUT's
169 designated "stable firmware version".
170
171 This repair method only applies to DUTs used for FAFT.
172 """
Garry Wang61cfe0b2020-08-21 16:26:00 -0700173
Garry Wang8d166092020-10-23 16:44:14 -0700174 def _get_faft_stable_build(self, host):
Garry Wangad2a1712020-03-26 15:06:43 -0700175 info = host.host_info_store.get()
176 return afe_utils.get_stable_faft_version_v2(info)
177
Garry Wang8d166092020-10-23 16:44:14 -0700178 def _get_os_stable_build(self, host):
Garry Wangad2a1712020-03-26 15:06:43 -0700179 # Use firmware in current stable os build.
180 return host.get_cros_repair_image_name()
181
Garry Wang8d166092020-10-23 16:44:14 -0700182 def _run_faft_repair(self, host, build):
183 host.firmware_install(build)
184
185 def _run_general_repair(self, host, build):
Garry Wang61cfe0b2020-08-21 16:26:00 -0700186 # As GeneralFirmwareRepair is the last repair action, we expect
187 # stable_version os image is loaded on usbkey during other repair
188 # action runs. And there is also no point to repeat and waste time if
189 # download image to usbkey failed in other repair actions.
190 if host._servo_host.validate_image_usbkey() != build:
191 raise hosts.AutoservRepairError('%s is expected to be preloaded,'
192 'however it\'s not found on the usbkey' % build,
193 'image not loaded on usbkey')
194 ec_image, bios_image = host._servo_host.prepare_repair_firmware_image()
Garry Wang8df98a02020-09-11 21:39:29 -0700195
Garry Wang50b56c12020-09-24 17:26:52 -0700196 # For EVT device with signed variant exists we skip this repair
197 # as it's hard to decide which image to use if DUT do not boot.
198 info = host.host_info_store.get()
199 phase = info.get_label_value('phase')
200 if 'signed' in bios_image and phase.lower() in ('evt', 'dvt', ''):
201 raise hosts.AutoservRepairError(
202 'Could not determine which firmware image to use'
203 ' due to signed firmware image variant exists but'
204 ' DUT phase is earlier than PVT or missing; Phase'
205 ' from inventory: %s' % phase,
206 'Can not determine variant for EVT device')
207
Garry Wang8df98a02020-09-11 21:39:29 -0700208 # Before flash firmware we want update the build into health profile.
209 if host.health_profile:
210 host.health_profile.set_firmware_stable_version(build)
211
Garry Wang61cfe0b2020-08-21 16:26:00 -0700212 if ec_image:
213 logging.info('Attempting to flash ec firmware...')
214 host.servo.program_ec(ec_image, copy_image=False)
215 if bios_image:
216 logging.info('Attempting to flash bios firmware...')
Garry Wang991f8562020-09-24 17:04:18 -0700217 host._servo_host.flash_ap_firmware_via_servo(bios_image)
Garry Wang61cfe0b2020-08-21 16:26:00 -0700218
219 logging.info('Cold resetting DUT through servo...')
220 host.servo.get_power_state_controller().reset()
221 host.wait_up(timeout=host.BOOT_TIMEOUT)
Garry Wang8df98a02020-09-11 21:39:29 -0700222 # flash firmware via servo will turn DUT into dev mode, so disable
223 # dev mode and reset gbb flag here.
224 host.run('/usr/share/vboot/bin/set_gbb_flags.sh 0', ignore_status=True)
225 host.run('crossystem disable_dev_request=1', ignore_status=True)
226 host.reboot()
Garry Wang61cfe0b2020-08-21 16:26:00 -0700227
Garry Wang8d166092020-10-23 16:44:14 -0700228
229class FaftFirmwareRepair(FirmwareRepair):
230 """
231 Reinstall the firmware for DUTs in faft related pool.
232 """
233
234 def repair(self, host):
Derek Beckett3d743402021-08-04 09:25:44 -0700235 tauto_warnings.lab_services_warn_and_error("Require servo", err=False)
Garry Wang8d166092020-10-23 16:44:14 -0700236 build = self._get_faft_stable_build(host)
237 if build:
238 self._run_faft_repair(host, build)
239 else:
240 logging.info('Cannot find faft stable_version, falling back to'
241 ' use firmware on OS stable_version.')
242 build = self._get_os_stable_build(host)
243 if not build:
244 raise hosts.AutoservRepairError(
245 'Failed to find stable_version from host_info.',
246 'cannot find stable_version')
247 self._run_general_repair(host, build)
248
249 def _is_applicable(self, host):
250 return _is_firmware_testing_device(host)
251
252 @property
253 def description(self):
254 return 'Re-install the stable firmware(faft) via servo'
255
256
Richard Barnette90ad4262016-11-17 17:29:24 -0800257class FirmwareVersionVerifier(hosts.Verifier):
258 """
259 Check for a firmware update, and apply it if appropriate.
260
261 This verifier checks to ensure that either the firmware on the DUT
262 is up-to-date, or that the target firmware can be installed from the
263 currently running build.
264
265 Failure occurs when all of the following apply:
Richard Barnette077665e2016-11-29 16:00:59 -0800266 1. The DUT is not excluded from updates. For example, DUTs used
267 for FAFT testing use `FirmwareRepair` instead.
268 2. The DUT's board has an assigned stable firmware version.
Richard Barnette90ad4262016-11-17 17:29:24 -0800269 3. The DUT is not running the assigned stable firmware.
270 4. The firmware supplied in the running OS build is not the
271 assigned stable firmware.
272
273 If the DUT needs an upgrade and the currently running OS build
Richard Barnette077665e2016-11-29 16:00:59 -0800274 supplies the necessary firmware, the verifier installs the new
275 firmware using `chromeos-firmwareupdate`. Failure to install will
276 cause the verifier to fail.
Richard Barnette90ad4262016-11-17 17:29:24 -0800277
278 This verifier nominally breaks the rule that "verifiers must succeed
279 quickly", since it can invoke `reboot()` during the success code
280 path. We're doing it anyway for two reasons:
281 * The time between updates will typically be measured in months,
282 so the amortized cost is low.
283 * The reason we distinguish repair from verify is to allow
284 rescheduling work immediately while the expensive repair happens
285 out-of-band. But a firmware update will likely hit all DUTs at
286 once, so it's pointless to pass the buck to repair.
287
288 N.B. This verifier is a trigger for all repair actions that install
289 the stable repair image. If the firmware is out-of-date, but the
290 stable repair image does *not* contain the proper firmware version,
291 _the target DUT will fail repair, and will be unable to fix itself_.
292 """
293
294 @staticmethod
295 def _get_rw_firmware(host):
296 result = host.run('crossystem fwid', ignore_status=True)
297 if result.exit_status == 0:
298 return result.stdout
299 else:
300 return None
301
302 @staticmethod
Richard Barnette90ad4262016-11-17 17:29:24 -0800303 def _check_hardware_match(version_a, version_b):
304 """
305 Check that two firmware versions identify the same hardware.
306
307 Firmware version strings look like this:
308 Google_Gnawty.5216.239.34
309 The part before the numbers identifies the hardware for which
310 the firmware was built. This function checks that the hardware
311 identified by `version_a` and `version_b` is the same.
312
Derek Beckett1a09a622021-02-25 09:43:29 -0800313 This is a confidence check to protect us from installing the wrong
Richard Barnette90ad4262016-11-17 17:29:24 -0800314 firmware on a DUT when a board label has somehow gone astray.
315
316 @param version_a First firmware version for the comparison.
317 @param version_b Second firmware version for the comparison.
318 """
319 hardware_a = version_a.split('.')[0]
320 hardware_b = version_b.split('.')[0]
321 if hardware_a != hardware_b:
322 message = 'Hardware/Firmware mismatch updating %s to %s'
323 raise hosts.AutoservVerifyError(
324 message % (version_a, version_b))
325
Otabek Kasimov98e9a432021-01-27 19:08:18 -0800326 def _is_stable_image_installed(self, host):
327 """Verify that ChromeOS image on host is a stable version.
328
329 This check verify that device booted from stable image to protect us
330 from installing the firmware from bad/broken/no-tested image. Bad
331 image can have broken updater or corrupted firmware.
332
333 The representation version looks like:
334 nocturne-release/R89-13728.0.0
335 Check compare version from host to version provide as stable image
336 from host-info file.
337
338 @param host CrosHost instance.
339 """
340 os_from_host = host.get_release_builder_path()
341 os_from_host_info = host.get_cros_repair_image_name()
342 if os_from_host != os_from_host_info:
343 raise hosts.AutoservNonCriticalVerifyError(
344 'Firmware update can be run only from stable image.'
345 ' Expected version:"%s", actually: "%s"' %
346 (os_from_host_info, os_from_host))
347
Gregory Nisbetd3007d22020-09-02 12:04:07 -0700348 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
Richard Barnette90ad4262016-11-17 17:29:24 -0800349 def verify(self, host):
Richard Barnette077665e2016-11-29 16:00:59 -0800350 # Test 1 - The DUT is not excluded from updates.
351 if not _is_firmware_update_supported(host):
Richard Barnette90ad4262016-11-17 17:29:24 -0800352 return
353 # Test 2 - The DUT has an assigned stable firmware version.
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800354 info = host.host_info_store.get()
Ningning Xia05af7402018-02-13 18:19:10 -0800355 if info.model is None:
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800356 raise hosts.AutoservVerifyError(
357 'Can not verify firmware version. '
Ningning Xia05af7402018-02-13 18:19:10 -0800358 'No model label value found')
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800359
C Shapiro70b70672019-05-24 11:26:16 -0600360 stable_firmware = None
361 try:
Gregory Nisbet7fe11c22019-11-22 11:06:06 -0800362 stable_firmware = afe_utils.get_stable_firmware_version_v2(info)
C Shapiro70b70672019-05-24 11:26:16 -0600363 except Exception as e:
364 logging.exception('Failed lookup to AFE for stable fw version '
365 ' with exception: %s', e)
366
Richard Barnette90ad4262016-11-17 17:29:24 -0800367 if stable_firmware is None:
Otabek Kasimov491ae3e2021-02-24 14:41:58 -0800368 logging.debug('Expected FW version not found')
Richard Barnette90ad4262016-11-17 17:29:24 -0800369 # This DUT doesn't have a firmware update target
370 return
Otabek Kasimov491ae3e2021-02-24 14:41:58 -0800371 logging.debug('Expected FW version: %s', stable_firmware)
Richard Barnette90ad4262016-11-17 17:29:24 -0800372 # For tests 3 and 4: If the output from `crossystem` or
373 # `chromeos-firmwareupdate` isn't what we expect, we log an
374 # error, but don't fail: We don't want DUTs unable to test a
375 # build merely because of a bug or change in either of those
376 # commands.
377
378 # Test 3 - The DUT is not running the target stable firmware.
379 current_firmware = self._get_rw_firmware(host)
380 if current_firmware is None:
381 logging.error('DUT firmware version can\'t be determined.')
382 return
Otabek Kasimov491ae3e2021-02-24 14:41:58 -0800383 logging.debug('Current FW version: %s', current_firmware)
Richard Barnette90ad4262016-11-17 17:29:24 -0800384 if current_firmware == stable_firmware:
385 return
386 # Test 4 - The firmware supplied in the running OS build is not
387 # the assigned stable firmware.
Ningning Xia05af7402018-02-13 18:19:10 -0800388 available_firmware = _get_available_firmware(host, info.model)
Richard Barnette90ad4262016-11-17 17:29:24 -0800389 if available_firmware is None:
390 logging.error('Supplied firmware version in OS can\'t be '
391 'determined.')
392 return
Otabek Kasimov98e9a432021-01-27 19:08:18 -0800393 self._is_stable_image_installed(host)
Richard Barnette90ad4262016-11-17 17:29:24 -0800394 if available_firmware != stable_firmware:
395 raise hosts.AutoservVerifyError(
396 'DUT firmware requires update from %s to %s' %
397 (current_firmware, stable_firmware))
398 # Time to update the firmware.
399 logging.info('Updating firmware from %s to %s',
400 current_firmware, stable_firmware)
401 self._check_hardware_match(current_firmware, stable_firmware)
402 try:
403 host.run('chromeos-firmwareupdate --mode=autoupdate')
404 host.reboot()
405 except Exception as e:
406 message = ('chromeos-firmwareupdate failed: from '
407 '%s to %s')
408 logging.exception(message, current_firmware, stable_firmware)
409 raise hosts.AutoservVerifyError(
410 message % (current_firmware, stable_firmware))
Richard Barnette1b489932017-02-14 10:50:58 -0800411 final_firmware = self._get_rw_firmware(host)
412 if final_firmware != stable_firmware:
413 message = ('chromeos-firmwareupdate failed: tried upgrade '
414 'to %s, now running %s instead')
415 raise hosts.AutoservVerifyError(
416 message % (stable_firmware, final_firmware))
Richard Barnette90ad4262016-11-17 17:29:24 -0800417
418 @property
419 def description(self):
420 return 'The firmware on this DUT is up-to-date'