blob: 04cca10cf246633aa7c05b86475eb09392fcfeed [file] [log] [blame]
Derek Beckettf73baca2020-08-19 15:08:47 -07001# Lint as: python2, python3
Richard Barnette90ad4262016-11-17 17:29:24 -08002# Copyright 2016 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
Richard Barnette1bf22a32016-11-18 16:14:31 -08006"""
7Repair actions and verifiers relating to CrOS firmware.
8
9This contains the repair actions and verifiers need to find problems
10with the firmware installed on Chrome OS DUTs, and when necessary, to
11fix problems by updating or re-installing the firmware.
Richard Barnette077665e2016-11-29 16:00:59 -080012
13The operations in the module support two distinct use cases:
14 * DUTs used for FAFT tests can in some cases have problems with
15 corrupted firmware. The module supplies `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070016 to check for corruption, and supplies `FaftFirmwareRepair` to
17 re-install firmware of current faft stable_version via servo
18 when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080019 * DUTs used for general testing normally should be running a
20 designated "stable" firmware version. This module supplies
21 `FirmwareVersionVerifier` to detect and automatically update
Garry Wangad2a1712020-03-26 15:06:43 -070022 firmware that is out-of-date from the designated version. This model
23 also supplys `GeneralFirmwareRepair` to re-install firmware that
24 tied with current stable_version image via servo when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080025
26For purposes of the operations in the module, we distinguish three kinds
27of DUT, based on pool assignments:
28 * DUTs used for general testing. These DUTs automatically check for
29 and install the stable firmware using `FirmwareVersionVerifier`.
30 * DUTs in pools used for FAFT testing. These check for bad firmware
31 builds with `FirmwareStatusVerifier`, and will fix problems using
32 `FirmwareRepair`. These DUTs don't check for or install the
33 stable firmware.
34 * DUTs not in general pools, and not used for FAFT. These DUTs
35 are expected to be managed by separate processes and are excluded
36 from all of the verification and repair code in this module.
Richard Barnette1bf22a32016-11-18 16:14:31 -080037"""
38
Xixuan Wu93e646c2017-12-07 18:36:10 -080039# pylint: disable=missing-docstring
40
Derek Beckettf73baca2020-08-19 15:08:47 -070041from __future__ import absolute_import
42from __future__ import division
43from __future__ import print_function
44
Hung-Te Lina014dbc2019-11-07 16:41:42 +080045import json
Richard Barnette90ad4262016-11-17 17:29:24 -080046import logging
Richard Barnette90ad4262016-11-17 17:29:24 -080047
48import common
Richard Barnette1bf22a32016-11-18 16:14:31 -080049from autotest_lib.client.common_lib import global_config
Richard Barnette90ad4262016-11-17 17:29:24 -080050from autotest_lib.client.common_lib import hosts
51from autotest_lib.server import afe_utils
Richard Barnette3245ae22018-08-31 11:50:08 -070052from autotest_lib.server.hosts import repair_utils
Derek Beckettf73baca2020-08-19 15:08:47 -070053import six
Richard Barnette1bf22a32016-11-18 16:14:31 -080054
55
Richard Barnette077665e2016-11-29 16:00:59 -080056# _FIRMWARE_REPAIR_POOLS - The set of pools that should be
57# managed by `FirmwareStatusVerifier` and `FirmwareRepair`.
58#
59_FIRMWARE_REPAIR_POOLS = set(
60 global_config.global_config.get_config_value(
61 'CROS',
62 'pools_support_firmware_repair',
63 type=str).split(','))
64
65
Garry Wangad2a1712020-03-26 15:06:43 -070066def _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -080067 """
Garry Wangad2a1712020-03-26 15:06:43 -070068 check if a host is dedicated for firmware testing.
Richard Barnette1bf22a32016-11-18 16:14:31 -080069
Richard Barnette077665e2016-11-29 16:00:59 -080070 When this function returns true, the DUT should be managed by
Garry Wangad2a1712020-03-26 15:06:43 -070071 `FirmwareStatusVerifier` and `FaftFirmwareRepair`, but not
72 `FirmwareVersionVerifier` and `GeneralFirmwareRepair.
Richard Barnette1bf22a32016-11-18 16:14:31 -080073
Richard Barnette077665e2016-11-29 16:00:59 -080074 @return A true value if the host should use `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070075 and `FaftFirmwareRepair`; a false value otherwise.
Richard Barnette1bf22a32016-11-18 16:14:31 -080076 """
Prathmesh Prabhub6cea612017-02-09 15:41:19 -080077 info = host.host_info_store.get()
78 return bool(info.pools & _FIRMWARE_REPAIR_POOLS)
Richard Barnette077665e2016-11-29 16:00:59 -080079
80
81def _is_firmware_update_supported(host):
82 """
83 Return whether a DUT should be running the standard firmware.
84
85 In the test lab, DUTs used for general testing, (e.g. the `bvt`
86 pool) need their firmware kept up-to-date with
87 `FirmwareVersionVerifier`. However, some pools have alternative
88 policies for firmware management. This returns whether a given DUT
89 should be updated via the standard stable version update, or
90 managed by some other procedure.
91
92 @param host The host to be checked for update policy.
93 @return A true value if the host should use
94 `FirmwareVersionVerifier`; a false value otherwise.
95 """
Garry Wangad2a1712020-03-26 15:06:43 -070096 return not _is_firmware_testing_device(host)
Richard Barnette1bf22a32016-11-18 16:14:31 -080097
98
Ningning Xia05af7402018-02-13 18:19:10 -080099def _get_available_firmware(host, model):
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800100 """Get the available RW firmware version given the model.
Ningning Xia05af7402018-02-13 18:19:10 -0800101
102 @param host The host to get available firmware for.
103 @param model The model name to get corresponding firmware version.
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800104 @return The available RW firmware version if found, else, None.
Ningning Xia05af7402018-02-13 18:19:10 -0800105 """
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800106 result = host.run('chromeos-firmwareupdate --manifest', ignore_status=True)
Ningning Xia05af7402018-02-13 18:19:10 -0800107
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800108 if result.exit_status != 0:
109 return None
Ningning Xia05af7402018-02-13 18:19:10 -0800110
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800111 # The manifest is a JSON in .model.host.versions.rw
112 data = json.loads(result.stdout) or {}
Derek Beckettf73baca2020-08-19 15:08:47 -0700113 key = model if len(data) > 1 else next(six.iterkeys(data), '')
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800114 key += '.host.versions.rw'
115 for k in key.split('.'):
116 data = data.get(k, {})
117 return data or None
Ningning Xia05af7402018-02-13 18:19:10 -0800118
119
Richard Barnette1bf22a32016-11-18 16:14:31 -0800120class FirmwareStatusVerifier(hosts.Verifier):
121 """
122 Verify that a host's firmware is in a good state.
123
124 For DUTs that run firmware tests, it's possible that the firmware
125 on the DUT can get corrupted. This verifier checks whether it
126 appears that firmware should be re-flashed using servo.
127 """
128
129 def verify(self, host):
Garry Wangad2a1712020-03-26 15:06:43 -0700130 if not _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -0800131 return
132 try:
133 # Read the AP firmware and dump the sections that we're
134 # interested in.
135 cmd = ('mkdir /tmp/verify_firmware; '
136 'cd /tmp/verify_firmware; '
137 'for section in VBLOCK_A VBLOCK_B FW_MAIN_A FW_MAIN_B; '
Chris McDonald9e6f9df2018-10-03 12:12:06 -0600138 'do flashrom -p host -r -i $section:$section; '
Richard Barnette1bf22a32016-11-18 16:14:31 -0800139 'done')
140 host.run(cmd)
141
142 # Verify the firmware blocks A and B.
143 cmd = ('vbutil_firmware --verify /tmp/verify_firmware/VBLOCK_%c'
144 ' --signpubkey /usr/share/vboot/devkeys/root_key.vbpubk'
145 ' --fv /tmp/verify_firmware/FW_MAIN_%c')
146 for c in ('A', 'B'):
147 rv = host.run(cmd % (c, c), ignore_status=True)
148 if rv.exit_status:
149 raise hosts.AutoservVerifyError(
150 'Firmware %c is in a bad state.' % c)
151 finally:
152 # Remove the temporary files.
153 host.run('rm -rf /tmp/verify_firmware')
154
155 @property
156 def description(self):
157 return 'Firmware on this DUT is clean'
Richard Barnette90ad4262016-11-17 17:29:24 -0800158
159
Richard Barnette077665e2016-11-29 16:00:59 -0800160class FirmwareRepair(hosts.RepairAction):
161 """
162 Reinstall the firmware image using servo.
163
164 This repair function attempts to use servo to install the DUT's
165 designated "stable firmware version".
166
167 This repair method only applies to DUTs used for FAFT.
168 """
Garry Wangad2a1712020-03-26 15:06:43 -0700169 def _get_stable_build(self, host):
Garry Wang61cfe0b2020-08-21 16:26:00 -0700170 raise NotImplementedError(
171 'Class %s does not implement _get_stable_build()'
172 % type(self).__name__)
173
174 def _run_repair(self, host, build):
175 raise NotImplementedError(
176 'Class %s does not implement _run_repair()'
177 % type(self).__name__)
Richard Barnette077665e2016-11-29 16:00:59 -0800178
179 def repair(self, host):
Garry Wang6cac8542020-03-13 16:58:20 -0700180 repair_utils.require_servo(host, ignore_state=True)
Garry Wangad2a1712020-03-26 15:06:43 -0700181 build = self._get_stable_build(host)
182 if not build:
183 raise hosts.AutoservRepairError(
Garry Wangea87f9c2020-06-15 11:29:01 -0700184 'Failed to find stable firmware build for %s, if the DUT is'
185 ' in faft-*pool, faft stable_version needs to be set.'
186 % host.hostname, 'cannot find firmware stable_version')
Garry Wang61cfe0b2020-08-21 16:26:00 -0700187 self._run_repair(host, build)
Richard Barnette077665e2016-11-29 16:00:59 -0800188
Richard Barnette077665e2016-11-29 16:00:59 -0800189
Garry Wangad2a1712020-03-26 15:06:43 -0700190class FaftFirmwareRepair(FirmwareRepair):
191 """
192 Reinstall the firmware for DUTs in faft related pool.
193 """
194 def _get_stable_build(self, host):
195 info = host.host_info_store.get()
196 return afe_utils.get_stable_faft_version_v2(info)
197
Garry Wang61cfe0b2020-08-21 16:26:00 -0700198 def _run_repair(self, host, build):
199 host.firmware_install(build)
200
Garry Wangad2a1712020-03-26 15:06:43 -0700201 def _is_applicable(self, host):
Garry Wang6c5fe582020-03-27 15:16:25 -0700202 return _is_firmware_testing_device(host)
203
204 @property
205 def description(self):
206 return 'Re-install the stable firmware(faft) via servo'
Garry Wangad2a1712020-03-26 15:06:43 -0700207
208
209class GeneralFirmwareRepair(FirmwareRepair):
210 """Reinstall the firmware for non-faft DUTs.
211 We need different RepairAction for non firmware testing DUT because
212 we want only try re-install firmware if all other RepairAction could
213 not restore ssh capability to the DUT.
214 """
215 def _get_stable_build(self, host):
216 # Use firmware in current stable os build.
217 return host.get_cros_repair_image_name()
218
Garry Wang61cfe0b2020-08-21 16:26:00 -0700219 def _run_repair(self, host, build):
220 # As GeneralFirmwareRepair is the last repair action, we expect
221 # stable_version os image is loaded on usbkey during other repair
222 # action runs. And there is also no point to repeat and waste time if
223 # download image to usbkey failed in other repair actions.
224 if host._servo_host.validate_image_usbkey() != build:
225 raise hosts.AutoservRepairError('%s is expected to be preloaded,'
226 'however it\'s not found on the usbkey' % build,
227 'image not loaded on usbkey')
228 ec_image, bios_image = host._servo_host.prepare_repair_firmware_image()
229 if ec_image:
230 logging.info('Attempting to flash ec firmware...')
231 host.servo.program_ec(ec_image, copy_image=False)
232 if bios_image:
233 logging.info('Attempting to flash bios firmware...')
234 host.servo.program_bios(bios_image, copy_image=False)
235
236 logging.info('Cold resetting DUT through servo...')
237 host.servo.get_power_state_controller().reset()
238 host.wait_up(timeout=host.BOOT_TIMEOUT)
239
Garry Wangad2a1712020-03-26 15:06:43 -0700240 def _is_applicable(self, host):
Garry Wang6c5fe582020-03-27 15:16:25 -0700241 return not _is_firmware_testing_device(host)
242
243 @property
244 def description(self):
245 return 'Re-install the stable firmware(non-faft) via servo'
Garry Wangad2a1712020-03-26 15:06:43 -0700246
247
Richard Barnette90ad4262016-11-17 17:29:24 -0800248class FirmwareVersionVerifier(hosts.Verifier):
249 """
250 Check for a firmware update, and apply it if appropriate.
251
252 This verifier checks to ensure that either the firmware on the DUT
253 is up-to-date, or that the target firmware can be installed from the
254 currently running build.
255
256 Failure occurs when all of the following apply:
Richard Barnette077665e2016-11-29 16:00:59 -0800257 1. The DUT is not excluded from updates. For example, DUTs used
258 for FAFT testing use `FirmwareRepair` instead.
259 2. The DUT's board has an assigned stable firmware version.
Richard Barnette90ad4262016-11-17 17:29:24 -0800260 3. The DUT is not running the assigned stable firmware.
261 4. The firmware supplied in the running OS build is not the
262 assigned stable firmware.
263
264 If the DUT needs an upgrade and the currently running OS build
Richard Barnette077665e2016-11-29 16:00:59 -0800265 supplies the necessary firmware, the verifier installs the new
266 firmware using `chromeos-firmwareupdate`. Failure to install will
267 cause the verifier to fail.
Richard Barnette90ad4262016-11-17 17:29:24 -0800268
269 This verifier nominally breaks the rule that "verifiers must succeed
270 quickly", since it can invoke `reboot()` during the success code
271 path. We're doing it anyway for two reasons:
272 * The time between updates will typically be measured in months,
273 so the amortized cost is low.
274 * The reason we distinguish repair from verify is to allow
275 rescheduling work immediately while the expensive repair happens
276 out-of-band. But a firmware update will likely hit all DUTs at
277 once, so it's pointless to pass the buck to repair.
278
279 N.B. This verifier is a trigger for all repair actions that install
280 the stable repair image. If the firmware is out-of-date, but the
281 stable repair image does *not* contain the proper firmware version,
282 _the target DUT will fail repair, and will be unable to fix itself_.
283 """
284
285 @staticmethod
286 def _get_rw_firmware(host):
287 result = host.run('crossystem fwid', ignore_status=True)
288 if result.exit_status == 0:
289 return result.stdout
290 else:
291 return None
292
293 @staticmethod
Richard Barnette90ad4262016-11-17 17:29:24 -0800294 def _check_hardware_match(version_a, version_b):
295 """
296 Check that two firmware versions identify the same hardware.
297
298 Firmware version strings look like this:
299 Google_Gnawty.5216.239.34
300 The part before the numbers identifies the hardware for which
301 the firmware was built. This function checks that the hardware
302 identified by `version_a` and `version_b` is the same.
303
304 This is a sanity check to protect us from installing the wrong
305 firmware on a DUT when a board label has somehow gone astray.
306
307 @param version_a First firmware version for the comparison.
308 @param version_b Second firmware version for the comparison.
309 """
310 hardware_a = version_a.split('.')[0]
311 hardware_b = version_b.split('.')[0]
312 if hardware_a != hardware_b:
313 message = 'Hardware/Firmware mismatch updating %s to %s'
314 raise hosts.AutoservVerifyError(
315 message % (version_a, version_b))
316
317 def verify(self, host):
Richard Barnette077665e2016-11-29 16:00:59 -0800318 # Test 1 - The DUT is not excluded from updates.
319 if not _is_firmware_update_supported(host):
Richard Barnette90ad4262016-11-17 17:29:24 -0800320 return
321 # Test 2 - The DUT has an assigned stable firmware version.
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800322 info = host.host_info_store.get()
Ningning Xia05af7402018-02-13 18:19:10 -0800323 if info.model is None:
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800324 raise hosts.AutoservVerifyError(
325 'Can not verify firmware version. '
Ningning Xia05af7402018-02-13 18:19:10 -0800326 'No model label value found')
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800327
C Shapiro70b70672019-05-24 11:26:16 -0600328 stable_firmware = None
329 try:
Gregory Nisbet7fe11c22019-11-22 11:06:06 -0800330 stable_firmware = afe_utils.get_stable_firmware_version_v2(info)
C Shapiro70b70672019-05-24 11:26:16 -0600331 except Exception as e:
332 logging.exception('Failed lookup to AFE for stable fw version '
333 ' with exception: %s', e)
334
Richard Barnette90ad4262016-11-17 17:29:24 -0800335 if stable_firmware is None:
336 # This DUT doesn't have a firmware update target
337 return
338
339 # For tests 3 and 4: If the output from `crossystem` or
340 # `chromeos-firmwareupdate` isn't what we expect, we log an
341 # error, but don't fail: We don't want DUTs unable to test a
342 # build merely because of a bug or change in either of those
343 # commands.
344
345 # Test 3 - The DUT is not running the target stable firmware.
346 current_firmware = self._get_rw_firmware(host)
347 if current_firmware is None:
348 logging.error('DUT firmware version can\'t be determined.')
349 return
350 if current_firmware == stable_firmware:
351 return
352 # Test 4 - The firmware supplied in the running OS build is not
353 # the assigned stable firmware.
Ningning Xia05af7402018-02-13 18:19:10 -0800354 available_firmware = _get_available_firmware(host, info.model)
Richard Barnette90ad4262016-11-17 17:29:24 -0800355 if available_firmware is None:
356 logging.error('Supplied firmware version in OS can\'t be '
357 'determined.')
358 return
359 if available_firmware != stable_firmware:
360 raise hosts.AutoservVerifyError(
361 'DUT firmware requires update from %s to %s' %
362 (current_firmware, stable_firmware))
363 # Time to update the firmware.
364 logging.info('Updating firmware from %s to %s',
365 current_firmware, stable_firmware)
366 self._check_hardware_match(current_firmware, stable_firmware)
367 try:
368 host.run('chromeos-firmwareupdate --mode=autoupdate')
369 host.reboot()
370 except Exception as e:
371 message = ('chromeos-firmwareupdate failed: from '
372 '%s to %s')
373 logging.exception(message, current_firmware, stable_firmware)
374 raise hosts.AutoservVerifyError(
375 message % (current_firmware, stable_firmware))
Richard Barnette1b489932017-02-14 10:50:58 -0800376 final_firmware = self._get_rw_firmware(host)
377 if final_firmware != stable_firmware:
378 message = ('chromeos-firmwareupdate failed: tried upgrade '
379 'to %s, now running %s instead')
380 raise hosts.AutoservVerifyError(
381 message % (stable_firmware, final_firmware))
Richard Barnette90ad4262016-11-17 17:29:24 -0800382
383 @property
384 def description(self):
385 return 'The firmware on this DUT is up-to-date'