blob: 4cd5c02b6311726a7412a50187a324e9bfe05dfc [file] [log] [blame]
Richard Barnette90ad4262016-11-17 17:29:24 -08001# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Richard Barnette1bf22a32016-11-18 16:14:31 -08005"""
6Repair actions and verifiers relating to CrOS firmware.
7
8This contains the repair actions and verifiers need to find problems
9with the firmware installed on Chrome OS DUTs, and when necessary, to
10fix problems by updating or re-installing the firmware.
Richard Barnette077665e2016-11-29 16:00:59 -080011
12The operations in the module support two distinct use cases:
13 * DUTs used for FAFT tests can in some cases have problems with
14 corrupted firmware. The module supplies `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070015 to check for corruption, and supplies `FaftFirmwareRepair` to
16 re-install firmware of current faft stable_version via servo
17 when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080018 * DUTs used for general testing normally should be running a
19 designated "stable" firmware version. This module supplies
20 `FirmwareVersionVerifier` to detect and automatically update
Garry Wangad2a1712020-03-26 15:06:43 -070021 firmware that is out-of-date from the designated version. This model
22 also supplys `GeneralFirmwareRepair` to re-install firmware that
23 tied with current stable_version image via servo when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080024
25For purposes of the operations in the module, we distinguish three kinds
26of DUT, based on pool assignments:
27 * DUTs used for general testing. These DUTs automatically check for
28 and install the stable firmware using `FirmwareVersionVerifier`.
29 * DUTs in pools used for FAFT testing. These check for bad firmware
30 builds with `FirmwareStatusVerifier`, and will fix problems using
31 `FirmwareRepair`. These DUTs don't check for or install the
32 stable firmware.
33 * DUTs not in general pools, and not used for FAFT. These DUTs
34 are expected to be managed by separate processes and are excluded
35 from all of the verification and repair code in this module.
Richard Barnette1bf22a32016-11-18 16:14:31 -080036"""
37
Xixuan Wu93e646c2017-12-07 18:36:10 -080038# pylint: disable=missing-docstring
39
Hung-Te Lina014dbc2019-11-07 16:41:42 +080040import json
Richard Barnette90ad4262016-11-17 17:29:24 -080041import logging
Richard Barnette90ad4262016-11-17 17:29:24 -080042
43import common
Richard Barnette1bf22a32016-11-18 16:14:31 -080044from autotest_lib.client.common_lib import global_config
Richard Barnette90ad4262016-11-17 17:29:24 -080045from autotest_lib.client.common_lib import hosts
46from autotest_lib.server import afe_utils
Richard Barnette3245ae22018-08-31 11:50:08 -070047from autotest_lib.server.hosts import repair_utils
Richard Barnette1bf22a32016-11-18 16:14:31 -080048
49
Richard Barnette077665e2016-11-29 16:00:59 -080050# _FIRMWARE_REPAIR_POOLS - The set of pools that should be
51# managed by `FirmwareStatusVerifier` and `FirmwareRepair`.
52#
53_FIRMWARE_REPAIR_POOLS = set(
54 global_config.global_config.get_config_value(
55 'CROS',
56 'pools_support_firmware_repair',
57 type=str).split(','))
58
59
Garry Wangad2a1712020-03-26 15:06:43 -070060def _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -080061 """
Garry Wangad2a1712020-03-26 15:06:43 -070062 check if a host is dedicated for firmware testing.
Richard Barnette1bf22a32016-11-18 16:14:31 -080063
Richard Barnette077665e2016-11-29 16:00:59 -080064 When this function returns true, the DUT should be managed by
Garry Wangad2a1712020-03-26 15:06:43 -070065 `FirmwareStatusVerifier` and `FaftFirmwareRepair`, but not
66 `FirmwareVersionVerifier` and `GeneralFirmwareRepair.
Richard Barnette1bf22a32016-11-18 16:14:31 -080067
Richard Barnette077665e2016-11-29 16:00:59 -080068 @return A true value if the host should use `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070069 and `FaftFirmwareRepair`; a false value otherwise.
Richard Barnette1bf22a32016-11-18 16:14:31 -080070 """
Prathmesh Prabhub6cea612017-02-09 15:41:19 -080071 info = host.host_info_store.get()
72 return bool(info.pools & _FIRMWARE_REPAIR_POOLS)
Richard Barnette077665e2016-11-29 16:00:59 -080073
74
75def _is_firmware_update_supported(host):
76 """
77 Return whether a DUT should be running the standard firmware.
78
79 In the test lab, DUTs used for general testing, (e.g. the `bvt`
80 pool) need their firmware kept up-to-date with
81 `FirmwareVersionVerifier`. However, some pools have alternative
82 policies for firmware management. This returns whether a given DUT
83 should be updated via the standard stable version update, or
84 managed by some other procedure.
85
86 @param host The host to be checked for update policy.
87 @return A true value if the host should use
88 `FirmwareVersionVerifier`; a false value otherwise.
89 """
Garry Wangad2a1712020-03-26 15:06:43 -070090 return not _is_firmware_testing_device(host)
Richard Barnette1bf22a32016-11-18 16:14:31 -080091
92
Ningning Xia05af7402018-02-13 18:19:10 -080093def _get_available_firmware(host, model):
Hung-Te Lina014dbc2019-11-07 16:41:42 +080094 """Get the available RW firmware version given the model.
Ningning Xia05af7402018-02-13 18:19:10 -080095
96 @param host The host to get available firmware for.
97 @param model The model name to get corresponding firmware version.
Hung-Te Lina014dbc2019-11-07 16:41:42 +080098 @return The available RW firmware version if found, else, None.
Ningning Xia05af7402018-02-13 18:19:10 -080099 """
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800100 result = host.run('chromeos-firmwareupdate --manifest', ignore_status=True)
Ningning Xia05af7402018-02-13 18:19:10 -0800101
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800102 if result.exit_status != 0:
103 return None
Ningning Xia05af7402018-02-13 18:19:10 -0800104
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800105 # The manifest is a JSON in .model.host.versions.rw
106 data = json.loads(result.stdout) or {}
107 key = model if len(data) > 1 else next(data.iterkeys(), '')
108 key += '.host.versions.rw'
109 for k in key.split('.'):
110 data = data.get(k, {})
111 return data or None
Ningning Xia05af7402018-02-13 18:19:10 -0800112
113
Richard Barnette1bf22a32016-11-18 16:14:31 -0800114class FirmwareStatusVerifier(hosts.Verifier):
115 """
116 Verify that a host's firmware is in a good state.
117
118 For DUTs that run firmware tests, it's possible that the firmware
119 on the DUT can get corrupted. This verifier checks whether it
120 appears that firmware should be re-flashed using servo.
121 """
122
123 def verify(self, host):
Garry Wangad2a1712020-03-26 15:06:43 -0700124 if not _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -0800125 return
126 try:
127 # Read the AP firmware and dump the sections that we're
128 # interested in.
129 cmd = ('mkdir /tmp/verify_firmware; '
130 'cd /tmp/verify_firmware; '
131 'for section in VBLOCK_A VBLOCK_B FW_MAIN_A FW_MAIN_B; '
Chris McDonald9e6f9df2018-10-03 12:12:06 -0600132 'do flashrom -p host -r -i $section:$section; '
Richard Barnette1bf22a32016-11-18 16:14:31 -0800133 'done')
134 host.run(cmd)
135
136 # Verify the firmware blocks A and B.
137 cmd = ('vbutil_firmware --verify /tmp/verify_firmware/VBLOCK_%c'
138 ' --signpubkey /usr/share/vboot/devkeys/root_key.vbpubk'
139 ' --fv /tmp/verify_firmware/FW_MAIN_%c')
140 for c in ('A', 'B'):
141 rv = host.run(cmd % (c, c), ignore_status=True)
142 if rv.exit_status:
143 raise hosts.AutoservVerifyError(
144 'Firmware %c is in a bad state.' % c)
145 finally:
146 # Remove the temporary files.
147 host.run('rm -rf /tmp/verify_firmware')
148
149 @property
150 def description(self):
151 return 'Firmware on this DUT is clean'
Richard Barnette90ad4262016-11-17 17:29:24 -0800152
153
Richard Barnette077665e2016-11-29 16:00:59 -0800154class FirmwareRepair(hosts.RepairAction):
155 """
156 Reinstall the firmware image using servo.
157
158 This repair function attempts to use servo to install the DUT's
159 designated "stable firmware version".
160
161 This repair method only applies to DUTs used for FAFT.
162 """
Garry Wangad2a1712020-03-26 15:06:43 -0700163 def _get_stable_build(self, host):
Garry Wang61cfe0b2020-08-21 16:26:00 -0700164 raise NotImplementedError(
165 'Class %s does not implement _get_stable_build()'
166 % type(self).__name__)
167
168 def _run_repair(self, host, build):
169 raise NotImplementedError(
170 'Class %s does not implement _run_repair()'
171 % type(self).__name__)
Richard Barnette077665e2016-11-29 16:00:59 -0800172
173 def repair(self, host):
Garry Wang6cac8542020-03-13 16:58:20 -0700174 repair_utils.require_servo(host, ignore_state=True)
Garry Wangad2a1712020-03-26 15:06:43 -0700175 build = self._get_stable_build(host)
176 if not build:
177 raise hosts.AutoservRepairError(
Garry Wangea87f9c2020-06-15 11:29:01 -0700178 'Failed to find stable firmware build for %s, if the DUT is'
179 ' in faft-*pool, faft stable_version needs to be set.'
180 % host.hostname, 'cannot find firmware stable_version')
Garry Wang61cfe0b2020-08-21 16:26:00 -0700181 self._run_repair(host, build)
Richard Barnette077665e2016-11-29 16:00:59 -0800182
Richard Barnette077665e2016-11-29 16:00:59 -0800183
Garry Wangad2a1712020-03-26 15:06:43 -0700184class FaftFirmwareRepair(FirmwareRepair):
185 """
186 Reinstall the firmware for DUTs in faft related pool.
187 """
188 def _get_stable_build(self, host):
189 info = host.host_info_store.get()
190 return afe_utils.get_stable_faft_version_v2(info)
191
Garry Wang61cfe0b2020-08-21 16:26:00 -0700192 def _run_repair(self, host, build):
193 host.firmware_install(build)
194
Garry Wangad2a1712020-03-26 15:06:43 -0700195 def _is_applicable(self, host):
Garry Wang6c5fe582020-03-27 15:16:25 -0700196 return _is_firmware_testing_device(host)
197
198 @property
199 def description(self):
200 return 'Re-install the stable firmware(faft) via servo'
Garry Wangad2a1712020-03-26 15:06:43 -0700201
202
203class GeneralFirmwareRepair(FirmwareRepair):
204 """Reinstall the firmware for non-faft DUTs.
205 We need different RepairAction for non firmware testing DUT because
206 we want only try re-install firmware if all other RepairAction could
207 not restore ssh capability to the DUT.
208 """
209 def _get_stable_build(self, host):
210 # Use firmware in current stable os build.
211 return host.get_cros_repair_image_name()
212
Garry Wang61cfe0b2020-08-21 16:26:00 -0700213 def _run_repair(self, host, build):
214 # As GeneralFirmwareRepair is the last repair action, we expect
215 # stable_version os image is loaded on usbkey during other repair
216 # action runs. And there is also no point to repeat and waste time if
217 # download image to usbkey failed in other repair actions.
218 if host._servo_host.validate_image_usbkey() != build:
219 raise hosts.AutoservRepairError('%s is expected to be preloaded,'
220 'however it\'s not found on the usbkey' % build,
221 'image not loaded on usbkey')
222 ec_image, bios_image = host._servo_host.prepare_repair_firmware_image()
223 if ec_image:
224 logging.info('Attempting to flash ec firmware...')
225 host.servo.program_ec(ec_image, copy_image=False)
226 if bios_image:
227 logging.info('Attempting to flash bios firmware...')
228 host.servo.program_bios(bios_image, copy_image=False)
229
230 logging.info('Cold resetting DUT through servo...')
231 host.servo.get_power_state_controller().reset()
232 host.wait_up(timeout=host.BOOT_TIMEOUT)
233
Garry Wangad2a1712020-03-26 15:06:43 -0700234 def _is_applicable(self, host):
Garry Wang6c5fe582020-03-27 15:16:25 -0700235 return not _is_firmware_testing_device(host)
236
237 @property
238 def description(self):
239 return 'Re-install the stable firmware(non-faft) via servo'
Garry Wangad2a1712020-03-26 15:06:43 -0700240
241
Richard Barnette90ad4262016-11-17 17:29:24 -0800242class FirmwareVersionVerifier(hosts.Verifier):
243 """
244 Check for a firmware update, and apply it if appropriate.
245
246 This verifier checks to ensure that either the firmware on the DUT
247 is up-to-date, or that the target firmware can be installed from the
248 currently running build.
249
250 Failure occurs when all of the following apply:
Richard Barnette077665e2016-11-29 16:00:59 -0800251 1. The DUT is not excluded from updates. For example, DUTs used
252 for FAFT testing use `FirmwareRepair` instead.
253 2. The DUT's board has an assigned stable firmware version.
Richard Barnette90ad4262016-11-17 17:29:24 -0800254 3. The DUT is not running the assigned stable firmware.
255 4. The firmware supplied in the running OS build is not the
256 assigned stable firmware.
257
258 If the DUT needs an upgrade and the currently running OS build
Richard Barnette077665e2016-11-29 16:00:59 -0800259 supplies the necessary firmware, the verifier installs the new
260 firmware using `chromeos-firmwareupdate`. Failure to install will
261 cause the verifier to fail.
Richard Barnette90ad4262016-11-17 17:29:24 -0800262
263 This verifier nominally breaks the rule that "verifiers must succeed
264 quickly", since it can invoke `reboot()` during the success code
265 path. We're doing it anyway for two reasons:
266 * The time between updates will typically be measured in months,
267 so the amortized cost is low.
268 * The reason we distinguish repair from verify is to allow
269 rescheduling work immediately while the expensive repair happens
270 out-of-band. But a firmware update will likely hit all DUTs at
271 once, so it's pointless to pass the buck to repair.
272
273 N.B. This verifier is a trigger for all repair actions that install
274 the stable repair image. If the firmware is out-of-date, but the
275 stable repair image does *not* contain the proper firmware version,
276 _the target DUT will fail repair, and will be unable to fix itself_.
277 """
278
279 @staticmethod
280 def _get_rw_firmware(host):
281 result = host.run('crossystem fwid', ignore_status=True)
282 if result.exit_status == 0:
283 return result.stdout
284 else:
285 return None
286
287 @staticmethod
Richard Barnette90ad4262016-11-17 17:29:24 -0800288 def _check_hardware_match(version_a, version_b):
289 """
290 Check that two firmware versions identify the same hardware.
291
292 Firmware version strings look like this:
293 Google_Gnawty.5216.239.34
294 The part before the numbers identifies the hardware for which
295 the firmware was built. This function checks that the hardware
296 identified by `version_a` and `version_b` is the same.
297
298 This is a sanity check to protect us from installing the wrong
299 firmware on a DUT when a board label has somehow gone astray.
300
301 @param version_a First firmware version for the comparison.
302 @param version_b Second firmware version for the comparison.
303 """
304 hardware_a = version_a.split('.')[0]
305 hardware_b = version_b.split('.')[0]
306 if hardware_a != hardware_b:
307 message = 'Hardware/Firmware mismatch updating %s to %s'
308 raise hosts.AutoservVerifyError(
309 message % (version_a, version_b))
310
311 def verify(self, host):
Richard Barnette077665e2016-11-29 16:00:59 -0800312 # Test 1 - The DUT is not excluded from updates.
313 if not _is_firmware_update_supported(host):
Richard Barnette90ad4262016-11-17 17:29:24 -0800314 return
315 # Test 2 - The DUT has an assigned stable firmware version.
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800316 info = host.host_info_store.get()
Ningning Xia05af7402018-02-13 18:19:10 -0800317 if info.model is None:
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800318 raise hosts.AutoservVerifyError(
319 'Can not verify firmware version. '
Ningning Xia05af7402018-02-13 18:19:10 -0800320 'No model label value found')
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800321
C Shapiro70b70672019-05-24 11:26:16 -0600322 stable_firmware = None
323 try:
Gregory Nisbet7fe11c22019-11-22 11:06:06 -0800324 stable_firmware = afe_utils.get_stable_firmware_version_v2(info)
C Shapiro70b70672019-05-24 11:26:16 -0600325 except Exception as e:
326 logging.exception('Failed lookup to AFE for stable fw version '
327 ' with exception: %s', e)
328
Richard Barnette90ad4262016-11-17 17:29:24 -0800329 if stable_firmware is None:
330 # This DUT doesn't have a firmware update target
331 return
332
333 # For tests 3 and 4: If the output from `crossystem` or
334 # `chromeos-firmwareupdate` isn't what we expect, we log an
335 # error, but don't fail: We don't want DUTs unable to test a
336 # build merely because of a bug or change in either of those
337 # commands.
338
339 # Test 3 - The DUT is not running the target stable firmware.
340 current_firmware = self._get_rw_firmware(host)
341 if current_firmware is None:
342 logging.error('DUT firmware version can\'t be determined.')
343 return
344 if current_firmware == stable_firmware:
345 return
346 # Test 4 - The firmware supplied in the running OS build is not
347 # the assigned stable firmware.
Ningning Xia05af7402018-02-13 18:19:10 -0800348 available_firmware = _get_available_firmware(host, info.model)
Richard Barnette90ad4262016-11-17 17:29:24 -0800349 if available_firmware is None:
350 logging.error('Supplied firmware version in OS can\'t be '
351 'determined.')
352 return
353 if available_firmware != stable_firmware:
354 raise hosts.AutoservVerifyError(
355 'DUT firmware requires update from %s to %s' %
356 (current_firmware, stable_firmware))
357 # Time to update the firmware.
358 logging.info('Updating firmware from %s to %s',
359 current_firmware, stable_firmware)
360 self._check_hardware_match(current_firmware, stable_firmware)
361 try:
362 host.run('chromeos-firmwareupdate --mode=autoupdate')
363 host.reboot()
364 except Exception as e:
365 message = ('chromeos-firmwareupdate failed: from '
366 '%s to %s')
367 logging.exception(message, current_firmware, stable_firmware)
368 raise hosts.AutoservVerifyError(
369 message % (current_firmware, stable_firmware))
Richard Barnette1b489932017-02-14 10:50:58 -0800370 final_firmware = self._get_rw_firmware(host)
371 if final_firmware != stable_firmware:
372 message = ('chromeos-firmwareupdate failed: tried upgrade '
373 'to %s, now running %s instead')
374 raise hosts.AutoservVerifyError(
375 message % (stable_firmware, final_firmware))
Richard Barnette90ad4262016-11-17 17:29:24 -0800376
377 @property
378 def description(self):
379 return 'The firmware on this DUT is up-to-date'