blob: 4119c0ef1096f8a92db12a08f2ba6e9cf4e63d74 [file] [log] [blame]
Derek Beckettf73baca2020-08-19 15:08:47 -07001# Lint as: python2, python3
Richard Barnette90ad4262016-11-17 17:29:24 -08002# Copyright 2016 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
Richard Barnette1bf22a32016-11-18 16:14:31 -08006"""
7Repair actions and verifiers relating to CrOS firmware.
8
9This contains the repair actions and verifiers need to find problems
10with the firmware installed on Chrome OS DUTs, and when necessary, to
11fix problems by updating or re-installing the firmware.
Richard Barnette077665e2016-11-29 16:00:59 -080012
13The operations in the module support two distinct use cases:
14 * DUTs used for FAFT tests can in some cases have problems with
15 corrupted firmware. The module supplies `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070016 to check for corruption, and supplies `FaftFirmwareRepair` to
17 re-install firmware of current faft stable_version via servo
18 when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080019 * DUTs used for general testing normally should be running a
20 designated "stable" firmware version. This module supplies
21 `FirmwareVersionVerifier` to detect and automatically update
Garry Wangad2a1712020-03-26 15:06:43 -070022 firmware that is out-of-date from the designated version. This model
23 also supplys `GeneralFirmwareRepair` to re-install firmware that
24 tied with current stable_version image via servo when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080025
26For purposes of the operations in the module, we distinguish three kinds
27of DUT, based on pool assignments:
28 * DUTs used for general testing. These DUTs automatically check for
29 and install the stable firmware using `FirmwareVersionVerifier`.
30 * DUTs in pools used for FAFT testing. These check for bad firmware
31 builds with `FirmwareStatusVerifier`, and will fix problems using
32 `FirmwareRepair`. These DUTs don't check for or install the
33 stable firmware.
34 * DUTs not in general pools, and not used for FAFT. These DUTs
35 are expected to be managed by separate processes and are excluded
36 from all of the verification and repair code in this module.
Richard Barnette1bf22a32016-11-18 16:14:31 -080037"""
38
Xixuan Wu93e646c2017-12-07 18:36:10 -080039# pylint: disable=missing-docstring
40
Derek Beckettf73baca2020-08-19 15:08:47 -070041from __future__ import absolute_import
42from __future__ import division
43from __future__ import print_function
44
Hung-Te Lina014dbc2019-11-07 16:41:42 +080045import json
Richard Barnette90ad4262016-11-17 17:29:24 -080046import logging
Richard Barnette90ad4262016-11-17 17:29:24 -080047
48import common
Richard Barnette1bf22a32016-11-18 16:14:31 -080049from autotest_lib.client.common_lib import global_config
Richard Barnette90ad4262016-11-17 17:29:24 -080050from autotest_lib.client.common_lib import hosts
51from autotest_lib.server import afe_utils
Richard Barnette3245ae22018-08-31 11:50:08 -070052from autotest_lib.server.hosts import repair_utils
Gregory Nisbetd3007d22020-09-02 12:04:07 -070053from autotest_lib.server.hosts import cros_constants
54
Mike Frysingerba2c0df2021-01-23 00:56:47 -050055from autotest_lib.utils.frozen_chromite.lib import timeout_util
Derek Beckettf73baca2020-08-19 15:08:47 -070056import six
Richard Barnette1bf22a32016-11-18 16:14:31 -080057
58
Richard Barnette077665e2016-11-29 16:00:59 -080059# _FIRMWARE_REPAIR_POOLS - The set of pools that should be
60# managed by `FirmwareStatusVerifier` and `FirmwareRepair`.
61#
62_FIRMWARE_REPAIR_POOLS = set(
63 global_config.global_config.get_config_value(
64 'CROS',
65 'pools_support_firmware_repair',
66 type=str).split(','))
67
68
Garry Wangad2a1712020-03-26 15:06:43 -070069def _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -080070 """
Garry Wangad2a1712020-03-26 15:06:43 -070071 check if a host is dedicated for firmware testing.
Richard Barnette1bf22a32016-11-18 16:14:31 -080072
Richard Barnette077665e2016-11-29 16:00:59 -080073 When this function returns true, the DUT should be managed by
Garry Wangad2a1712020-03-26 15:06:43 -070074 `FirmwareStatusVerifier` and `FaftFirmwareRepair`, but not
75 `FirmwareVersionVerifier` and `GeneralFirmwareRepair.
Richard Barnette1bf22a32016-11-18 16:14:31 -080076
Richard Barnette077665e2016-11-29 16:00:59 -080077 @return A true value if the host should use `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070078 and `FaftFirmwareRepair`; a false value otherwise.
Richard Barnette1bf22a32016-11-18 16:14:31 -080079 """
Prathmesh Prabhub6cea612017-02-09 15:41:19 -080080 info = host.host_info_store.get()
81 return bool(info.pools & _FIRMWARE_REPAIR_POOLS)
Richard Barnette077665e2016-11-29 16:00:59 -080082
83
84def _is_firmware_update_supported(host):
85 """
86 Return whether a DUT should be running the standard firmware.
87
88 In the test lab, DUTs used for general testing, (e.g. the `bvt`
89 pool) need their firmware kept up-to-date with
90 `FirmwareVersionVerifier`. However, some pools have alternative
91 policies for firmware management. This returns whether a given DUT
92 should be updated via the standard stable version update, or
93 managed by some other procedure.
94
95 @param host The host to be checked for update policy.
96 @return A true value if the host should use
97 `FirmwareVersionVerifier`; a false value otherwise.
98 """
Garry Wangad2a1712020-03-26 15:06:43 -070099 return not _is_firmware_testing_device(host)
Richard Barnette1bf22a32016-11-18 16:14:31 -0800100
101
Ningning Xia05af7402018-02-13 18:19:10 -0800102def _get_available_firmware(host, model):
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800103 """Get the available RW firmware version given the model.
Ningning Xia05af7402018-02-13 18:19:10 -0800104
105 @param host The host to get available firmware for.
106 @param model The model name to get corresponding firmware version.
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800107 @return The available RW firmware version if found, else, None.
Ningning Xia05af7402018-02-13 18:19:10 -0800108 """
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800109 result = host.run('chromeos-firmwareupdate --manifest', ignore_status=True)
Ningning Xia05af7402018-02-13 18:19:10 -0800110
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800111 if result.exit_status != 0:
112 return None
Ningning Xia05af7402018-02-13 18:19:10 -0800113
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800114 # The manifest is a JSON in .model.host.versions.rw
115 data = json.loads(result.stdout) or {}
Derek Beckettf73baca2020-08-19 15:08:47 -0700116 key = model if len(data) > 1 else next(six.iterkeys(data), '')
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800117 key += '.host.versions.rw'
118 for k in key.split('.'):
119 data = data.get(k, {})
120 return data or None
Ningning Xia05af7402018-02-13 18:19:10 -0800121
122
Richard Barnette1bf22a32016-11-18 16:14:31 -0800123class FirmwareStatusVerifier(hosts.Verifier):
124 """
125 Verify that a host's firmware is in a good state.
126
127 For DUTs that run firmware tests, it's possible that the firmware
128 on the DUT can get corrupted. This verifier checks whether it
129 appears that firmware should be re-flashed using servo.
130 """
131
Gregory Nisbetd3007d22020-09-02 12:04:07 -0700132 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
Richard Barnette1bf22a32016-11-18 16:14:31 -0800133 def verify(self, host):
Garry Wangad2a1712020-03-26 15:06:43 -0700134 if not _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -0800135 return
136 try:
137 # Read the AP firmware and dump the sections that we're
138 # interested in.
139 cmd = ('mkdir /tmp/verify_firmware; '
140 'cd /tmp/verify_firmware; '
141 'for section in VBLOCK_A VBLOCK_B FW_MAIN_A FW_MAIN_B; '
Chris McDonald9e6f9df2018-10-03 12:12:06 -0600142 'do flashrom -p host -r -i $section:$section; '
Richard Barnette1bf22a32016-11-18 16:14:31 -0800143 'done')
144 host.run(cmd)
145
146 # Verify the firmware blocks A and B.
147 cmd = ('vbutil_firmware --verify /tmp/verify_firmware/VBLOCK_%c'
148 ' --signpubkey /usr/share/vboot/devkeys/root_key.vbpubk'
149 ' --fv /tmp/verify_firmware/FW_MAIN_%c')
150 for c in ('A', 'B'):
151 rv = host.run(cmd % (c, c), ignore_status=True)
152 if rv.exit_status:
153 raise hosts.AutoservVerifyError(
154 'Firmware %c is in a bad state.' % c)
155 finally:
156 # Remove the temporary files.
157 host.run('rm -rf /tmp/verify_firmware')
158
159 @property
160 def description(self):
161 return 'Firmware on this DUT is clean'
Richard Barnette90ad4262016-11-17 17:29:24 -0800162
163
Richard Barnette077665e2016-11-29 16:00:59 -0800164class FirmwareRepair(hosts.RepairAction):
165 """
166 Reinstall the firmware image using servo.
167
168 This repair function attempts to use servo to install the DUT's
169 designated "stable firmware version".
170
171 This repair method only applies to DUTs used for FAFT.
172 """
Garry Wang61cfe0b2020-08-21 16:26:00 -0700173
Garry Wang8d166092020-10-23 16:44:14 -0700174 def _get_faft_stable_build(self, host):
Garry Wangad2a1712020-03-26 15:06:43 -0700175 info = host.host_info_store.get()
176 return afe_utils.get_stable_faft_version_v2(info)
177
Garry Wang8d166092020-10-23 16:44:14 -0700178 def _get_os_stable_build(self, host):
Garry Wangad2a1712020-03-26 15:06:43 -0700179 # Use firmware in current stable os build.
180 return host.get_cros_repair_image_name()
181
Garry Wang8d166092020-10-23 16:44:14 -0700182 def _run_faft_repair(self, host, build):
183 host.firmware_install(build)
184
185 def _run_general_repair(self, host, build):
Garry Wang61cfe0b2020-08-21 16:26:00 -0700186 # As GeneralFirmwareRepair is the last repair action, we expect
187 # stable_version os image is loaded on usbkey during other repair
188 # action runs. And there is also no point to repeat and waste time if
189 # download image to usbkey failed in other repair actions.
190 if host._servo_host.validate_image_usbkey() != build:
191 raise hosts.AutoservRepairError('%s is expected to be preloaded,'
192 'however it\'s not found on the usbkey' % build,
193 'image not loaded on usbkey')
194 ec_image, bios_image = host._servo_host.prepare_repair_firmware_image()
Garry Wang8df98a02020-09-11 21:39:29 -0700195
Garry Wang50b56c12020-09-24 17:26:52 -0700196 # For EVT device with signed variant exists we skip this repair
197 # as it's hard to decide which image to use if DUT do not boot.
198 info = host.host_info_store.get()
199 phase = info.get_label_value('phase')
200 if 'signed' in bios_image and phase.lower() in ('evt', 'dvt', ''):
201 raise hosts.AutoservRepairError(
202 'Could not determine which firmware image to use'
203 ' due to signed firmware image variant exists but'
204 ' DUT phase is earlier than PVT or missing; Phase'
205 ' from inventory: %s' % phase,
206 'Can not determine variant for EVT device')
207
Garry Wang8df98a02020-09-11 21:39:29 -0700208 # Before flash firmware we want update the build into health profile.
209 if host.health_profile:
210 host.health_profile.set_firmware_stable_version(build)
211
Garry Wang61cfe0b2020-08-21 16:26:00 -0700212 if ec_image:
213 logging.info('Attempting to flash ec firmware...')
214 host.servo.program_ec(ec_image, copy_image=False)
215 if bios_image:
216 logging.info('Attempting to flash bios firmware...')
Garry Wang991f8562020-09-24 17:04:18 -0700217 host._servo_host.flash_ap_firmware_via_servo(bios_image)
Garry Wang61cfe0b2020-08-21 16:26:00 -0700218
219 logging.info('Cold resetting DUT through servo...')
220 host.servo.get_power_state_controller().reset()
221 host.wait_up(timeout=host.BOOT_TIMEOUT)
Garry Wang8df98a02020-09-11 21:39:29 -0700222 # flash firmware via servo will turn DUT into dev mode, so disable
223 # dev mode and reset gbb flag here.
224 host.run('/usr/share/vboot/bin/set_gbb_flags.sh 0', ignore_status=True)
225 host.run('crossystem disable_dev_request=1', ignore_status=True)
226 host.reboot()
Garry Wang61cfe0b2020-08-21 16:26:00 -0700227
Garry Wang8d166092020-10-23 16:44:14 -0700228
229class FaftFirmwareRepair(FirmwareRepair):
230 """
231 Reinstall the firmware for DUTs in faft related pool.
232 """
233
234 def repair(self, host):
235 repair_utils.require_servo(host, ignore_state=True)
236 build = self._get_faft_stable_build(host)
237 if build:
238 self._run_faft_repair(host, build)
239 else:
240 logging.info('Cannot find faft stable_version, falling back to'
241 ' use firmware on OS stable_version.')
242 build = self._get_os_stable_build(host)
243 if not build:
244 raise hosts.AutoservRepairError(
245 'Failed to find stable_version from host_info.',
246 'cannot find stable_version')
247 self._run_general_repair(host, build)
248
249 def _is_applicable(self, host):
250 return _is_firmware_testing_device(host)
251
252 @property
253 def description(self):
254 return 'Re-install the stable firmware(faft) via servo'
255
256
257class GeneralFirmwareRepair(FirmwareRepair):
258 """Reinstall the firmware for non-faft DUTs.
259 We need different RepairAction for non firmware testing DUT because
260 we want only try re-install firmware if all other RepairAction could
261 not restore ssh capability to the DUT.
262 """
263
264 def repair(self, host):
265 repair_utils.require_servo(host, ignore_state=True)
266 build = self._get_os_stable_build(host)
267 if not build:
268 raise hosts.AutoservRepairError(
269 'Failed to find stable_version from host_info.',
270 'cannot find stable_version')
271 self._run_general_repair(host, build)
272
Garry Wangad2a1712020-03-26 15:06:43 -0700273 def _is_applicable(self, host):
Garry Wang8df98a02020-09-11 21:39:29 -0700274 if _is_firmware_testing_device(host):
Garry Wang8df98a02020-09-11 21:39:29 -0700275 return False
276 if not host.servo:
277 logging.info(
278 'The current servo state of %s is not met the'
279 ' minimum requirement to flash firmware.', host.hostname)
280 # Flash firmware via servo is consider an expansive opertation, so we
281 # want to check repair data from previous repairs to determine if
282 # firmware repair is need.
283 dhp = host.health_profile
284 if not dhp:
285 logging.info('Device health profile is not available, cannot'
286 ' determine if firmware repair is needed.')
287 return False
Garry Wang991f8562020-09-24 17:04:18 -0700288 repair_fail_count = dhp.get_repair_fail_count()
289 if repair_fail_count < 2:
290 # We want to start with a more conservative strategy, so only try
291 # this action on DUTs that failed repair at least twice.
292 # @TODO(xianuowang@) adjust or remove this threshold.
293 logging.info(
294 'Firmware repair will only applies to DUT that'
295 ' failed at least two AdminRepair, current fail'
296 ' count: %s', repair_fail_count)
297 return False
Garry Wang8df98a02020-09-11 21:39:29 -0700298 flashed_build = dhp.get_firmware_stable_version()
Garry Wang8d166092020-10-23 16:44:14 -0700299 candidate_build = self._get_os_stable_build(host)
Garry Wang8df98a02020-09-11 21:39:29 -0700300 # If we had an success firmware flash in this repair loop,
301 # there is no need to retry flash the same firmware build.
302 if (dhp.get_succeed_repair_action(self.tag) > 0
303 and flashed_build == candidate_build):
304 logging.info(
305 'Firmware from %s has been already installed on %s,'
306 ' no need to retry.', flashed_build, host.hostname)
307 return False
308 if (dhp.get_failed_repair_action(self.tag) > 2
309 and flashed_build == candidate_build):
310 logging.info(
311 'Firmware from %s has been attempted and failed 3 '
312 'times, no need to retry.', flashed_build)
313 return False
314 return True
Garry Wang6c5fe582020-03-27 15:16:25 -0700315
316 @property
317 def description(self):
318 return 'Re-install the stable firmware(non-faft) via servo'
Garry Wangad2a1712020-03-26 15:06:43 -0700319
320
Richard Barnette90ad4262016-11-17 17:29:24 -0800321class FirmwareVersionVerifier(hosts.Verifier):
322 """
323 Check for a firmware update, and apply it if appropriate.
324
325 This verifier checks to ensure that either the firmware on the DUT
326 is up-to-date, or that the target firmware can be installed from the
327 currently running build.
328
329 Failure occurs when all of the following apply:
Richard Barnette077665e2016-11-29 16:00:59 -0800330 1. The DUT is not excluded from updates. For example, DUTs used
331 for FAFT testing use `FirmwareRepair` instead.
332 2. The DUT's board has an assigned stable firmware version.
Richard Barnette90ad4262016-11-17 17:29:24 -0800333 3. The DUT is not running the assigned stable firmware.
334 4. The firmware supplied in the running OS build is not the
335 assigned stable firmware.
336
337 If the DUT needs an upgrade and the currently running OS build
Richard Barnette077665e2016-11-29 16:00:59 -0800338 supplies the necessary firmware, the verifier installs the new
339 firmware using `chromeos-firmwareupdate`. Failure to install will
340 cause the verifier to fail.
Richard Barnette90ad4262016-11-17 17:29:24 -0800341
342 This verifier nominally breaks the rule that "verifiers must succeed
343 quickly", since it can invoke `reboot()` during the success code
344 path. We're doing it anyway for two reasons:
345 * The time between updates will typically be measured in months,
346 so the amortized cost is low.
347 * The reason we distinguish repair from verify is to allow
348 rescheduling work immediately while the expensive repair happens
349 out-of-band. But a firmware update will likely hit all DUTs at
350 once, so it's pointless to pass the buck to repair.
351
352 N.B. This verifier is a trigger for all repair actions that install
353 the stable repair image. If the firmware is out-of-date, but the
354 stable repair image does *not* contain the proper firmware version,
355 _the target DUT will fail repair, and will be unable to fix itself_.
356 """
357
358 @staticmethod
359 def _get_rw_firmware(host):
360 result = host.run('crossystem fwid', ignore_status=True)
361 if result.exit_status == 0:
362 return result.stdout
363 else:
364 return None
365
366 @staticmethod
Richard Barnette90ad4262016-11-17 17:29:24 -0800367 def _check_hardware_match(version_a, version_b):
368 """
369 Check that two firmware versions identify the same hardware.
370
371 Firmware version strings look like this:
372 Google_Gnawty.5216.239.34
373 The part before the numbers identifies the hardware for which
374 the firmware was built. This function checks that the hardware
375 identified by `version_a` and `version_b` is the same.
376
377 This is a sanity check to protect us from installing the wrong
378 firmware on a DUT when a board label has somehow gone astray.
379
380 @param version_a First firmware version for the comparison.
381 @param version_b Second firmware version for the comparison.
382 """
383 hardware_a = version_a.split('.')[0]
384 hardware_b = version_b.split('.')[0]
385 if hardware_a != hardware_b:
386 message = 'Hardware/Firmware mismatch updating %s to %s'
387 raise hosts.AutoservVerifyError(
388 message % (version_a, version_b))
389
Gregory Nisbetd3007d22020-09-02 12:04:07 -0700390 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
Richard Barnette90ad4262016-11-17 17:29:24 -0800391 def verify(self, host):
Richard Barnette077665e2016-11-29 16:00:59 -0800392 # Test 1 - The DUT is not excluded from updates.
393 if not _is_firmware_update_supported(host):
Richard Barnette90ad4262016-11-17 17:29:24 -0800394 return
395 # Test 2 - The DUT has an assigned stable firmware version.
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800396 info = host.host_info_store.get()
Ningning Xia05af7402018-02-13 18:19:10 -0800397 if info.model is None:
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800398 raise hosts.AutoservVerifyError(
399 'Can not verify firmware version. '
Ningning Xia05af7402018-02-13 18:19:10 -0800400 'No model label value found')
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800401
C Shapiro70b70672019-05-24 11:26:16 -0600402 stable_firmware = None
403 try:
Gregory Nisbet7fe11c22019-11-22 11:06:06 -0800404 stable_firmware = afe_utils.get_stable_firmware_version_v2(info)
C Shapiro70b70672019-05-24 11:26:16 -0600405 except Exception as e:
406 logging.exception('Failed lookup to AFE for stable fw version '
407 ' with exception: %s', e)
408
Richard Barnette90ad4262016-11-17 17:29:24 -0800409 if stable_firmware is None:
410 # This DUT doesn't have a firmware update target
411 return
412
413 # For tests 3 and 4: If the output from `crossystem` or
414 # `chromeos-firmwareupdate` isn't what we expect, we log an
415 # error, but don't fail: We don't want DUTs unable to test a
416 # build merely because of a bug or change in either of those
417 # commands.
418
419 # Test 3 - The DUT is not running the target stable firmware.
420 current_firmware = self._get_rw_firmware(host)
421 if current_firmware is None:
422 logging.error('DUT firmware version can\'t be determined.')
423 return
424 if current_firmware == stable_firmware:
425 return
426 # Test 4 - The firmware supplied in the running OS build is not
427 # the assigned stable firmware.
Ningning Xia05af7402018-02-13 18:19:10 -0800428 available_firmware = _get_available_firmware(host, info.model)
Richard Barnette90ad4262016-11-17 17:29:24 -0800429 if available_firmware is None:
430 logging.error('Supplied firmware version in OS can\'t be '
431 'determined.')
432 return
433 if available_firmware != stable_firmware:
434 raise hosts.AutoservVerifyError(
435 'DUT firmware requires update from %s to %s' %
436 (current_firmware, stable_firmware))
437 # Time to update the firmware.
438 logging.info('Updating firmware from %s to %s',
439 current_firmware, stable_firmware)
440 self._check_hardware_match(current_firmware, stable_firmware)
441 try:
442 host.run('chromeos-firmwareupdate --mode=autoupdate')
443 host.reboot()
444 except Exception as e:
445 message = ('chromeos-firmwareupdate failed: from '
446 '%s to %s')
447 logging.exception(message, current_firmware, stable_firmware)
448 raise hosts.AutoservVerifyError(
449 message % (current_firmware, stable_firmware))
Richard Barnette1b489932017-02-14 10:50:58 -0800450 final_firmware = self._get_rw_firmware(host)
451 if final_firmware != stable_firmware:
452 message = ('chromeos-firmwareupdate failed: tried upgrade '
453 'to %s, now running %s instead')
454 raise hosts.AutoservVerifyError(
455 message % (stable_firmware, final_firmware))
Richard Barnette90ad4262016-11-17 17:29:24 -0800456
457 @property
458 def description(self):
459 return 'The firmware on this DUT is up-to-date'