blob: 227e9330b4630d6b479a26b96877443b10d32125 [file] [log] [blame]
Derek Beckettf73baca2020-08-19 15:08:47 -07001# Lint as: python2, python3
Richard Barnette90ad4262016-11-17 17:29:24 -08002# Copyright 2016 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
Richard Barnette1bf22a32016-11-18 16:14:31 -08006"""
7Repair actions and verifiers relating to CrOS firmware.
8
9This contains the repair actions and verifiers need to find problems
10with the firmware installed on Chrome OS DUTs, and when necessary, to
11fix problems by updating or re-installing the firmware.
Richard Barnette077665e2016-11-29 16:00:59 -080012
13The operations in the module support two distinct use cases:
14 * DUTs used for FAFT tests can in some cases have problems with
15 corrupted firmware. The module supplies `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070016 to check for corruption, and supplies `FaftFirmwareRepair` to
17 re-install firmware of current faft stable_version via servo
18 when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080019 * DUTs used for general testing normally should be running a
20 designated "stable" firmware version. This module supplies
21 `FirmwareVersionVerifier` to detect and automatically update
Garry Wangad2a1712020-03-26 15:06:43 -070022 firmware that is out-of-date from the designated version. This model
23 also supplys `GeneralFirmwareRepair` to re-install firmware that
24 tied with current stable_version image via servo when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080025
26For purposes of the operations in the module, we distinguish three kinds
27of DUT, based on pool assignments:
28 * DUTs used for general testing. These DUTs automatically check for
29 and install the stable firmware using `FirmwareVersionVerifier`.
30 * DUTs in pools used for FAFT testing. These check for bad firmware
31 builds with `FirmwareStatusVerifier`, and will fix problems using
32 `FirmwareRepair`. These DUTs don't check for or install the
33 stable firmware.
34 * DUTs not in general pools, and not used for FAFT. These DUTs
35 are expected to be managed by separate processes and are excluded
36 from all of the verification and repair code in this module.
Richard Barnette1bf22a32016-11-18 16:14:31 -080037"""
38
Xixuan Wu93e646c2017-12-07 18:36:10 -080039# pylint: disable=missing-docstring
40
Derek Beckettf73baca2020-08-19 15:08:47 -070041from __future__ import absolute_import
42from __future__ import division
43from __future__ import print_function
44
Hung-Te Lina014dbc2019-11-07 16:41:42 +080045import json
Richard Barnette90ad4262016-11-17 17:29:24 -080046import logging
Richard Barnette90ad4262016-11-17 17:29:24 -080047
48import common
Richard Barnette1bf22a32016-11-18 16:14:31 -080049from autotest_lib.client.common_lib import global_config
Richard Barnette90ad4262016-11-17 17:29:24 -080050from autotest_lib.client.common_lib import hosts
51from autotest_lib.server import afe_utils
Richard Barnette3245ae22018-08-31 11:50:08 -070052from autotest_lib.server.hosts import repair_utils
Gregory Nisbetd3007d22020-09-02 12:04:07 -070053from autotest_lib.server.hosts import cros_constants
54
55from chromite.lib import timeout_util
Derek Beckettf73baca2020-08-19 15:08:47 -070056import six
Richard Barnette1bf22a32016-11-18 16:14:31 -080057
58
Richard Barnette077665e2016-11-29 16:00:59 -080059# _FIRMWARE_REPAIR_POOLS - The set of pools that should be
60# managed by `FirmwareStatusVerifier` and `FirmwareRepair`.
61#
62_FIRMWARE_REPAIR_POOLS = set(
63 global_config.global_config.get_config_value(
64 'CROS',
65 'pools_support_firmware_repair',
66 type=str).split(','))
67
68
Garry Wangad2a1712020-03-26 15:06:43 -070069def _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -080070 """
Garry Wangad2a1712020-03-26 15:06:43 -070071 check if a host is dedicated for firmware testing.
Richard Barnette1bf22a32016-11-18 16:14:31 -080072
Richard Barnette077665e2016-11-29 16:00:59 -080073 When this function returns true, the DUT should be managed by
Garry Wangad2a1712020-03-26 15:06:43 -070074 `FirmwareStatusVerifier` and `FaftFirmwareRepair`, but not
75 `FirmwareVersionVerifier` and `GeneralFirmwareRepair.
Richard Barnette1bf22a32016-11-18 16:14:31 -080076
Richard Barnette077665e2016-11-29 16:00:59 -080077 @return A true value if the host should use `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070078 and `FaftFirmwareRepair`; a false value otherwise.
Richard Barnette1bf22a32016-11-18 16:14:31 -080079 """
Prathmesh Prabhub6cea612017-02-09 15:41:19 -080080 info = host.host_info_store.get()
81 return bool(info.pools & _FIRMWARE_REPAIR_POOLS)
Richard Barnette077665e2016-11-29 16:00:59 -080082
83
84def _is_firmware_update_supported(host):
85 """
86 Return whether a DUT should be running the standard firmware.
87
88 In the test lab, DUTs used for general testing, (e.g. the `bvt`
89 pool) need their firmware kept up-to-date with
90 `FirmwareVersionVerifier`. However, some pools have alternative
91 policies for firmware management. This returns whether a given DUT
92 should be updated via the standard stable version update, or
93 managed by some other procedure.
94
95 @param host The host to be checked for update policy.
96 @return A true value if the host should use
97 `FirmwareVersionVerifier`; a false value otherwise.
98 """
Garry Wangad2a1712020-03-26 15:06:43 -070099 return not _is_firmware_testing_device(host)
Richard Barnette1bf22a32016-11-18 16:14:31 -0800100
101
Ningning Xia05af7402018-02-13 18:19:10 -0800102def _get_available_firmware(host, model):
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800103 """Get the available RW firmware version given the model.
Ningning Xia05af7402018-02-13 18:19:10 -0800104
105 @param host The host to get available firmware for.
106 @param model The model name to get corresponding firmware version.
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800107 @return The available RW firmware version if found, else, None.
Ningning Xia05af7402018-02-13 18:19:10 -0800108 """
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800109 result = host.run('chromeos-firmwareupdate --manifest', ignore_status=True)
Ningning Xia05af7402018-02-13 18:19:10 -0800110
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800111 if result.exit_status != 0:
112 return None
Ningning Xia05af7402018-02-13 18:19:10 -0800113
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800114 # The manifest is a JSON in .model.host.versions.rw
115 data = json.loads(result.stdout) or {}
Derek Beckettf73baca2020-08-19 15:08:47 -0700116 key = model if len(data) > 1 else next(six.iterkeys(data), '')
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800117 key += '.host.versions.rw'
118 for k in key.split('.'):
119 data = data.get(k, {})
120 return data or None
Ningning Xia05af7402018-02-13 18:19:10 -0800121
122
Richard Barnette1bf22a32016-11-18 16:14:31 -0800123class FirmwareStatusVerifier(hosts.Verifier):
124 """
125 Verify that a host's firmware is in a good state.
126
127 For DUTs that run firmware tests, it's possible that the firmware
128 on the DUT can get corrupted. This verifier checks whether it
129 appears that firmware should be re-flashed using servo.
130 """
131
Gregory Nisbetd3007d22020-09-02 12:04:07 -0700132 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
Richard Barnette1bf22a32016-11-18 16:14:31 -0800133 def verify(self, host):
Garry Wangad2a1712020-03-26 15:06:43 -0700134 if not _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -0800135 return
136 try:
137 # Read the AP firmware and dump the sections that we're
138 # interested in.
139 cmd = ('mkdir /tmp/verify_firmware; '
140 'cd /tmp/verify_firmware; '
141 'for section in VBLOCK_A VBLOCK_B FW_MAIN_A FW_MAIN_B; '
Chris McDonald9e6f9df2018-10-03 12:12:06 -0600142 'do flashrom -p host -r -i $section:$section; '
Richard Barnette1bf22a32016-11-18 16:14:31 -0800143 'done')
144 host.run(cmd)
145
146 # Verify the firmware blocks A and B.
147 cmd = ('vbutil_firmware --verify /tmp/verify_firmware/VBLOCK_%c'
148 ' --signpubkey /usr/share/vboot/devkeys/root_key.vbpubk'
149 ' --fv /tmp/verify_firmware/FW_MAIN_%c')
150 for c in ('A', 'B'):
151 rv = host.run(cmd % (c, c), ignore_status=True)
152 if rv.exit_status:
153 raise hosts.AutoservVerifyError(
154 'Firmware %c is in a bad state.' % c)
155 finally:
156 # Remove the temporary files.
157 host.run('rm -rf /tmp/verify_firmware')
158
159 @property
160 def description(self):
161 return 'Firmware on this DUT is clean'
Richard Barnette90ad4262016-11-17 17:29:24 -0800162
163
Richard Barnette077665e2016-11-29 16:00:59 -0800164class FirmwareRepair(hosts.RepairAction):
165 """
166 Reinstall the firmware image using servo.
167
168 This repair function attempts to use servo to install the DUT's
169 designated "stable firmware version".
170
171 This repair method only applies to DUTs used for FAFT.
172 """
Garry Wangad2a1712020-03-26 15:06:43 -0700173 def _get_stable_build(self, host):
Garry Wang61cfe0b2020-08-21 16:26:00 -0700174 raise NotImplementedError(
175 'Class %s does not implement _get_stable_build()'
176 % type(self).__name__)
177
178 def _run_repair(self, host, build):
179 raise NotImplementedError(
180 'Class %s does not implement _run_repair()'
181 % type(self).__name__)
Richard Barnette077665e2016-11-29 16:00:59 -0800182
183 def repair(self, host):
Garry Wang6cac8542020-03-13 16:58:20 -0700184 repair_utils.require_servo(host, ignore_state=True)
Garry Wangad2a1712020-03-26 15:06:43 -0700185 build = self._get_stable_build(host)
186 if not build:
187 raise hosts.AutoservRepairError(
Garry Wangea87f9c2020-06-15 11:29:01 -0700188 'Failed to find stable firmware build for %s, if the DUT is'
189 ' in faft-*pool, faft stable_version needs to be set.'
190 % host.hostname, 'cannot find firmware stable_version')
Garry Wang61cfe0b2020-08-21 16:26:00 -0700191 self._run_repair(host, build)
Richard Barnette077665e2016-11-29 16:00:59 -0800192
Richard Barnette077665e2016-11-29 16:00:59 -0800193
Garry Wangad2a1712020-03-26 15:06:43 -0700194class FaftFirmwareRepair(FirmwareRepair):
195 """
196 Reinstall the firmware for DUTs in faft related pool.
197 """
198 def _get_stable_build(self, host):
199 info = host.host_info_store.get()
200 return afe_utils.get_stable_faft_version_v2(info)
201
Garry Wang61cfe0b2020-08-21 16:26:00 -0700202 def _run_repair(self, host, build):
203 host.firmware_install(build)
204
Garry Wangad2a1712020-03-26 15:06:43 -0700205 def _is_applicable(self, host):
Garry Wang6c5fe582020-03-27 15:16:25 -0700206 return _is_firmware_testing_device(host)
207
208 @property
209 def description(self):
210 return 'Re-install the stable firmware(faft) via servo'
Garry Wangad2a1712020-03-26 15:06:43 -0700211
212
213class GeneralFirmwareRepair(FirmwareRepair):
214 """Reinstall the firmware for non-faft DUTs.
215 We need different RepairAction for non firmware testing DUT because
216 we want only try re-install firmware if all other RepairAction could
217 not restore ssh capability to the DUT.
218 """
219 def _get_stable_build(self, host):
220 # Use firmware in current stable os build.
221 return host.get_cros_repair_image_name()
222
Garry Wang61cfe0b2020-08-21 16:26:00 -0700223 def _run_repair(self, host, build):
224 # As GeneralFirmwareRepair is the last repair action, we expect
225 # stable_version os image is loaded on usbkey during other repair
226 # action runs. And there is also no point to repeat and waste time if
227 # download image to usbkey failed in other repair actions.
228 if host._servo_host.validate_image_usbkey() != build:
229 raise hosts.AutoservRepairError('%s is expected to be preloaded,'
230 'however it\'s not found on the usbkey' % build,
231 'image not loaded on usbkey')
232 ec_image, bios_image = host._servo_host.prepare_repair_firmware_image()
233 if ec_image:
234 logging.info('Attempting to flash ec firmware...')
235 host.servo.program_ec(ec_image, copy_image=False)
236 if bios_image:
237 logging.info('Attempting to flash bios firmware...')
238 host.servo.program_bios(bios_image, copy_image=False)
239
240 logging.info('Cold resetting DUT through servo...')
241 host.servo.get_power_state_controller().reset()
242 host.wait_up(timeout=host.BOOT_TIMEOUT)
243
Garry Wangad2a1712020-03-26 15:06:43 -0700244 def _is_applicable(self, host):
Garry Wang6c5fe582020-03-27 15:16:25 -0700245 return not _is_firmware_testing_device(host)
246
247 @property
248 def description(self):
249 return 'Re-install the stable firmware(non-faft) via servo'
Garry Wangad2a1712020-03-26 15:06:43 -0700250
251
Richard Barnette90ad4262016-11-17 17:29:24 -0800252class FirmwareVersionVerifier(hosts.Verifier):
253 """
254 Check for a firmware update, and apply it if appropriate.
255
256 This verifier checks to ensure that either the firmware on the DUT
257 is up-to-date, or that the target firmware can be installed from the
258 currently running build.
259
260 Failure occurs when all of the following apply:
Richard Barnette077665e2016-11-29 16:00:59 -0800261 1. The DUT is not excluded from updates. For example, DUTs used
262 for FAFT testing use `FirmwareRepair` instead.
263 2. The DUT's board has an assigned stable firmware version.
Richard Barnette90ad4262016-11-17 17:29:24 -0800264 3. The DUT is not running the assigned stable firmware.
265 4. The firmware supplied in the running OS build is not the
266 assigned stable firmware.
267
268 If the DUT needs an upgrade and the currently running OS build
Richard Barnette077665e2016-11-29 16:00:59 -0800269 supplies the necessary firmware, the verifier installs the new
270 firmware using `chromeos-firmwareupdate`. Failure to install will
271 cause the verifier to fail.
Richard Barnette90ad4262016-11-17 17:29:24 -0800272
273 This verifier nominally breaks the rule that "verifiers must succeed
274 quickly", since it can invoke `reboot()` during the success code
275 path. We're doing it anyway for two reasons:
276 * The time between updates will typically be measured in months,
277 so the amortized cost is low.
278 * The reason we distinguish repair from verify is to allow
279 rescheduling work immediately while the expensive repair happens
280 out-of-band. But a firmware update will likely hit all DUTs at
281 once, so it's pointless to pass the buck to repair.
282
283 N.B. This verifier is a trigger for all repair actions that install
284 the stable repair image. If the firmware is out-of-date, but the
285 stable repair image does *not* contain the proper firmware version,
286 _the target DUT will fail repair, and will be unable to fix itself_.
287 """
288
289 @staticmethod
290 def _get_rw_firmware(host):
291 result = host.run('crossystem fwid', ignore_status=True)
292 if result.exit_status == 0:
293 return result.stdout
294 else:
295 return None
296
297 @staticmethod
Richard Barnette90ad4262016-11-17 17:29:24 -0800298 def _check_hardware_match(version_a, version_b):
299 """
300 Check that two firmware versions identify the same hardware.
301
302 Firmware version strings look like this:
303 Google_Gnawty.5216.239.34
304 The part before the numbers identifies the hardware for which
305 the firmware was built. This function checks that the hardware
306 identified by `version_a` and `version_b` is the same.
307
308 This is a sanity check to protect us from installing the wrong
309 firmware on a DUT when a board label has somehow gone astray.
310
311 @param version_a First firmware version for the comparison.
312 @param version_b Second firmware version for the comparison.
313 """
314 hardware_a = version_a.split('.')[0]
315 hardware_b = version_b.split('.')[0]
316 if hardware_a != hardware_b:
317 message = 'Hardware/Firmware mismatch updating %s to %s'
318 raise hosts.AutoservVerifyError(
319 message % (version_a, version_b))
320
Gregory Nisbetd3007d22020-09-02 12:04:07 -0700321 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
Richard Barnette90ad4262016-11-17 17:29:24 -0800322 def verify(self, host):
Richard Barnette077665e2016-11-29 16:00:59 -0800323 # Test 1 - The DUT is not excluded from updates.
324 if not _is_firmware_update_supported(host):
Richard Barnette90ad4262016-11-17 17:29:24 -0800325 return
326 # Test 2 - The DUT has an assigned stable firmware version.
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800327 info = host.host_info_store.get()
Ningning Xia05af7402018-02-13 18:19:10 -0800328 if info.model is None:
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800329 raise hosts.AutoservVerifyError(
330 'Can not verify firmware version. '
Ningning Xia05af7402018-02-13 18:19:10 -0800331 'No model label value found')
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800332
C Shapiro70b70672019-05-24 11:26:16 -0600333 stable_firmware = None
334 try:
Gregory Nisbet7fe11c22019-11-22 11:06:06 -0800335 stable_firmware = afe_utils.get_stable_firmware_version_v2(info)
C Shapiro70b70672019-05-24 11:26:16 -0600336 except Exception as e:
337 logging.exception('Failed lookup to AFE for stable fw version '
338 ' with exception: %s', e)
339
Richard Barnette90ad4262016-11-17 17:29:24 -0800340 if stable_firmware is None:
341 # This DUT doesn't have a firmware update target
342 return
343
344 # For tests 3 and 4: If the output from `crossystem` or
345 # `chromeos-firmwareupdate` isn't what we expect, we log an
346 # error, but don't fail: We don't want DUTs unable to test a
347 # build merely because of a bug or change in either of those
348 # commands.
349
350 # Test 3 - The DUT is not running the target stable firmware.
351 current_firmware = self._get_rw_firmware(host)
352 if current_firmware is None:
353 logging.error('DUT firmware version can\'t be determined.')
354 return
355 if current_firmware == stable_firmware:
356 return
357 # Test 4 - The firmware supplied in the running OS build is not
358 # the assigned stable firmware.
Ningning Xia05af7402018-02-13 18:19:10 -0800359 available_firmware = _get_available_firmware(host, info.model)
Richard Barnette90ad4262016-11-17 17:29:24 -0800360 if available_firmware is None:
361 logging.error('Supplied firmware version in OS can\'t be '
362 'determined.')
363 return
364 if available_firmware != stable_firmware:
365 raise hosts.AutoservVerifyError(
366 'DUT firmware requires update from %s to %s' %
367 (current_firmware, stable_firmware))
368 # Time to update the firmware.
369 logging.info('Updating firmware from %s to %s',
370 current_firmware, stable_firmware)
371 self._check_hardware_match(current_firmware, stable_firmware)
372 try:
373 host.run('chromeos-firmwareupdate --mode=autoupdate')
374 host.reboot()
375 except Exception as e:
376 message = ('chromeos-firmwareupdate failed: from '
377 '%s to %s')
378 logging.exception(message, current_firmware, stable_firmware)
379 raise hosts.AutoservVerifyError(
380 message % (current_firmware, stable_firmware))
Richard Barnette1b489932017-02-14 10:50:58 -0800381 final_firmware = self._get_rw_firmware(host)
382 if final_firmware != stable_firmware:
383 message = ('chromeos-firmwareupdate failed: tried upgrade '
384 'to %s, now running %s instead')
385 raise hosts.AutoservVerifyError(
386 message % (stable_firmware, final_firmware))
Richard Barnette90ad4262016-11-17 17:29:24 -0800387
388 @property
389 def description(self):
390 return 'The firmware on this DUT is up-to-date'