blob: 89bbe43a2e4643ddf7a1f6ac5e1651f1511d40dd [file] [log] [blame]
Richard Barnette90ad4262016-11-17 17:29:24 -08001# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Richard Barnette1bf22a32016-11-18 16:14:31 -08005"""
6Repair actions and verifiers relating to CrOS firmware.
7
8This contains the repair actions and verifiers need to find problems
9with the firmware installed on Chrome OS DUTs, and when necessary, to
10fix problems by updating or re-installing the firmware.
Richard Barnette077665e2016-11-29 16:00:59 -080011
12The operations in the module support two distinct use cases:
13 * DUTs used for FAFT tests can in some cases have problems with
14 corrupted firmware. The module supplies `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070015 to check for corruption, and supplies `FaftFirmwareRepair` to
16 re-install firmware of current faft stable_version via servo
17 when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080018 * DUTs used for general testing normally should be running a
19 designated "stable" firmware version. This module supplies
20 `FirmwareVersionVerifier` to detect and automatically update
Garry Wangad2a1712020-03-26 15:06:43 -070021 firmware that is out-of-date from the designated version. This model
22 also supplys `GeneralFirmwareRepair` to re-install firmware that
23 tied with current stable_version image via servo when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080024
25For purposes of the operations in the module, we distinguish three kinds
26of DUT, based on pool assignments:
27 * DUTs used for general testing. These DUTs automatically check for
28 and install the stable firmware using `FirmwareVersionVerifier`.
29 * DUTs in pools used for FAFT testing. These check for bad firmware
30 builds with `FirmwareStatusVerifier`, and will fix problems using
31 `FirmwareRepair`. These DUTs don't check for or install the
32 stable firmware.
33 * DUTs not in general pools, and not used for FAFT. These DUTs
34 are expected to be managed by separate processes and are excluded
35 from all of the verification and repair code in this module.
Richard Barnette1bf22a32016-11-18 16:14:31 -080036"""
37
Xixuan Wu93e646c2017-12-07 18:36:10 -080038# pylint: disable=missing-docstring
39
Hung-Te Lina014dbc2019-11-07 16:41:42 +080040import json
Richard Barnette90ad4262016-11-17 17:29:24 -080041import logging
Richard Barnette90ad4262016-11-17 17:29:24 -080042
43import common
Richard Barnette1bf22a32016-11-18 16:14:31 -080044from autotest_lib.client.common_lib import global_config
Richard Barnette90ad4262016-11-17 17:29:24 -080045from autotest_lib.client.common_lib import hosts
46from autotest_lib.server import afe_utils
Richard Barnette3245ae22018-08-31 11:50:08 -070047from autotest_lib.server.hosts import repair_utils
Richard Barnette1bf22a32016-11-18 16:14:31 -080048
49
Richard Barnette077665e2016-11-29 16:00:59 -080050# _FIRMWARE_REPAIR_POOLS - The set of pools that should be
51# managed by `FirmwareStatusVerifier` and `FirmwareRepair`.
52#
53_FIRMWARE_REPAIR_POOLS = set(
54 global_config.global_config.get_config_value(
55 'CROS',
56 'pools_support_firmware_repair',
57 type=str).split(','))
58
59
Garry Wangad2a1712020-03-26 15:06:43 -070060def _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -080061 """
Garry Wangad2a1712020-03-26 15:06:43 -070062 check if a host is dedicated for firmware testing.
Richard Barnette1bf22a32016-11-18 16:14:31 -080063
Richard Barnette077665e2016-11-29 16:00:59 -080064 When this function returns true, the DUT should be managed by
Garry Wangad2a1712020-03-26 15:06:43 -070065 `FirmwareStatusVerifier` and `FaftFirmwareRepair`, but not
66 `FirmwareVersionVerifier` and `GeneralFirmwareRepair.
Richard Barnette1bf22a32016-11-18 16:14:31 -080067
Richard Barnette077665e2016-11-29 16:00:59 -080068 @return A true value if the host should use `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070069 and `FaftFirmwareRepair`; a false value otherwise.
Richard Barnette1bf22a32016-11-18 16:14:31 -080070 """
Prathmesh Prabhub6cea612017-02-09 15:41:19 -080071 info = host.host_info_store.get()
72 return bool(info.pools & _FIRMWARE_REPAIR_POOLS)
Richard Barnette077665e2016-11-29 16:00:59 -080073
74
75def _is_firmware_update_supported(host):
76 """
77 Return whether a DUT should be running the standard firmware.
78
79 In the test lab, DUTs used for general testing, (e.g. the `bvt`
80 pool) need their firmware kept up-to-date with
81 `FirmwareVersionVerifier`. However, some pools have alternative
82 policies for firmware management. This returns whether a given DUT
83 should be updated via the standard stable version update, or
84 managed by some other procedure.
85
86 @param host The host to be checked for update policy.
87 @return A true value if the host should use
88 `FirmwareVersionVerifier`; a false value otherwise.
89 """
Garry Wangad2a1712020-03-26 15:06:43 -070090 return not _is_firmware_testing_device(host)
Richard Barnette1bf22a32016-11-18 16:14:31 -080091
92
Ningning Xia05af7402018-02-13 18:19:10 -080093def _get_available_firmware(host, model):
Hung-Te Lina014dbc2019-11-07 16:41:42 +080094 """Get the available RW firmware version given the model.
Ningning Xia05af7402018-02-13 18:19:10 -080095
96 @param host The host to get available firmware for.
97 @param model The model name to get corresponding firmware version.
Hung-Te Lina014dbc2019-11-07 16:41:42 +080098 @return The available RW firmware version if found, else, None.
Ningning Xia05af7402018-02-13 18:19:10 -080099 """
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800100 result = host.run('chromeos-firmwareupdate --manifest', ignore_status=True)
Ningning Xia05af7402018-02-13 18:19:10 -0800101
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800102 if result.exit_status != 0:
103 return None
Ningning Xia05af7402018-02-13 18:19:10 -0800104
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800105 # The manifest is a JSON in .model.host.versions.rw
106 data = json.loads(result.stdout) or {}
107 key = model if len(data) > 1 else next(data.iterkeys(), '')
108 key += '.host.versions.rw'
109 for k in key.split('.'):
110 data = data.get(k, {})
111 return data or None
Ningning Xia05af7402018-02-13 18:19:10 -0800112
113
Richard Barnette1bf22a32016-11-18 16:14:31 -0800114class FirmwareStatusVerifier(hosts.Verifier):
115 """
116 Verify that a host's firmware is in a good state.
117
118 For DUTs that run firmware tests, it's possible that the firmware
119 on the DUT can get corrupted. This verifier checks whether it
120 appears that firmware should be re-flashed using servo.
121 """
122
123 def verify(self, host):
Garry Wangad2a1712020-03-26 15:06:43 -0700124 if not _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -0800125 return
126 try:
127 # Read the AP firmware and dump the sections that we're
128 # interested in.
129 cmd = ('mkdir /tmp/verify_firmware; '
130 'cd /tmp/verify_firmware; '
131 'for section in VBLOCK_A VBLOCK_B FW_MAIN_A FW_MAIN_B; '
Chris McDonald9e6f9df2018-10-03 12:12:06 -0600132 'do flashrom -p host -r -i $section:$section; '
Richard Barnette1bf22a32016-11-18 16:14:31 -0800133 'done')
134 host.run(cmd)
135
136 # Verify the firmware blocks A and B.
137 cmd = ('vbutil_firmware --verify /tmp/verify_firmware/VBLOCK_%c'
138 ' --signpubkey /usr/share/vboot/devkeys/root_key.vbpubk'
139 ' --fv /tmp/verify_firmware/FW_MAIN_%c')
140 for c in ('A', 'B'):
141 rv = host.run(cmd % (c, c), ignore_status=True)
142 if rv.exit_status:
143 raise hosts.AutoservVerifyError(
144 'Firmware %c is in a bad state.' % c)
145 finally:
146 # Remove the temporary files.
147 host.run('rm -rf /tmp/verify_firmware')
148
149 @property
150 def description(self):
151 return 'Firmware on this DUT is clean'
Richard Barnette90ad4262016-11-17 17:29:24 -0800152
153
Richard Barnette077665e2016-11-29 16:00:59 -0800154class FirmwareRepair(hosts.RepairAction):
155 """
156 Reinstall the firmware image using servo.
157
158 This repair function attempts to use servo to install the DUT's
159 designated "stable firmware version".
160
161 This repair method only applies to DUTs used for FAFT.
162 """
Garry Wangad2a1712020-03-26 15:06:43 -0700163 def _get_stable_build(self, host):
164 raise NotImplementedError('Class %s does not implement '
165 '_get_stable_build()'
166 % type(self).__name__)
Richard Barnette077665e2016-11-29 16:00:59 -0800167
168 def repair(self, host):
Garry Wang6cac8542020-03-13 16:58:20 -0700169 repair_utils.require_servo(host, ignore_state=True)
Garry Wangad2a1712020-03-26 15:06:43 -0700170 build = self._get_stable_build(host)
171 if not build:
172 raise hosts.AutoservRepairError(
Garry Wangea87f9c2020-06-15 11:29:01 -0700173 'Failed to find stable firmware build for %s, if the DUT is'
174 ' in faft-*pool, faft stable_version needs to be set.'
175 % host.hostname, 'cannot find firmware stable_version')
Garry Wangad2a1712020-03-26 15:06:43 -0700176 host.firmware_install(build)
Richard Barnette077665e2016-11-29 16:00:59 -0800177
Richard Barnette077665e2016-11-29 16:00:59 -0800178
Garry Wangad2a1712020-03-26 15:06:43 -0700179class FaftFirmwareRepair(FirmwareRepair):
180 """
181 Reinstall the firmware for DUTs in faft related pool.
182 """
183 def _get_stable_build(self, host):
184 info = host.host_info_store.get()
185 return afe_utils.get_stable_faft_version_v2(info)
186
187 def _is_applicable(self, host):
Garry Wang6c5fe582020-03-27 15:16:25 -0700188 return _is_firmware_testing_device(host)
189
190 @property
191 def description(self):
192 return 'Re-install the stable firmware(faft) via servo'
Garry Wangad2a1712020-03-26 15:06:43 -0700193
194
195class GeneralFirmwareRepair(FirmwareRepair):
196 """Reinstall the firmware for non-faft DUTs.
197 We need different RepairAction for non firmware testing DUT because
198 we want only try re-install firmware if all other RepairAction could
199 not restore ssh capability to the DUT.
200 """
201 def _get_stable_build(self, host):
202 # Use firmware in current stable os build.
203 return host.get_cros_repair_image_name()
204
205 def _is_applicable(self, host):
Garry Wang6c5fe582020-03-27 15:16:25 -0700206 return not _is_firmware_testing_device(host)
207
208 @property
209 def description(self):
210 return 'Re-install the stable firmware(non-faft) via servo'
Garry Wangad2a1712020-03-26 15:06:43 -0700211
212
Richard Barnette90ad4262016-11-17 17:29:24 -0800213class FirmwareVersionVerifier(hosts.Verifier):
214 """
215 Check for a firmware update, and apply it if appropriate.
216
217 This verifier checks to ensure that either the firmware on the DUT
218 is up-to-date, or that the target firmware can be installed from the
219 currently running build.
220
221 Failure occurs when all of the following apply:
Richard Barnette077665e2016-11-29 16:00:59 -0800222 1. The DUT is not excluded from updates. For example, DUTs used
223 for FAFT testing use `FirmwareRepair` instead.
224 2. The DUT's board has an assigned stable firmware version.
Richard Barnette90ad4262016-11-17 17:29:24 -0800225 3. The DUT is not running the assigned stable firmware.
226 4. The firmware supplied in the running OS build is not the
227 assigned stable firmware.
228
229 If the DUT needs an upgrade and the currently running OS build
Richard Barnette077665e2016-11-29 16:00:59 -0800230 supplies the necessary firmware, the verifier installs the new
231 firmware using `chromeos-firmwareupdate`. Failure to install will
232 cause the verifier to fail.
Richard Barnette90ad4262016-11-17 17:29:24 -0800233
234 This verifier nominally breaks the rule that "verifiers must succeed
235 quickly", since it can invoke `reboot()` during the success code
236 path. We're doing it anyway for two reasons:
237 * The time between updates will typically be measured in months,
238 so the amortized cost is low.
239 * The reason we distinguish repair from verify is to allow
240 rescheduling work immediately while the expensive repair happens
241 out-of-band. But a firmware update will likely hit all DUTs at
242 once, so it's pointless to pass the buck to repair.
243
244 N.B. This verifier is a trigger for all repair actions that install
245 the stable repair image. If the firmware is out-of-date, but the
246 stable repair image does *not* contain the proper firmware version,
247 _the target DUT will fail repair, and will be unable to fix itself_.
248 """
249
250 @staticmethod
251 def _get_rw_firmware(host):
252 result = host.run('crossystem fwid', ignore_status=True)
253 if result.exit_status == 0:
254 return result.stdout
255 else:
256 return None
257
258 @staticmethod
Richard Barnette90ad4262016-11-17 17:29:24 -0800259 def _check_hardware_match(version_a, version_b):
260 """
261 Check that two firmware versions identify the same hardware.
262
263 Firmware version strings look like this:
264 Google_Gnawty.5216.239.34
265 The part before the numbers identifies the hardware for which
266 the firmware was built. This function checks that the hardware
267 identified by `version_a` and `version_b` is the same.
268
269 This is a sanity check to protect us from installing the wrong
270 firmware on a DUT when a board label has somehow gone astray.
271
272 @param version_a First firmware version for the comparison.
273 @param version_b Second firmware version for the comparison.
274 """
275 hardware_a = version_a.split('.')[0]
276 hardware_b = version_b.split('.')[0]
277 if hardware_a != hardware_b:
278 message = 'Hardware/Firmware mismatch updating %s to %s'
279 raise hosts.AutoservVerifyError(
280 message % (version_a, version_b))
281
282 def verify(self, host):
Richard Barnette077665e2016-11-29 16:00:59 -0800283 # Test 1 - The DUT is not excluded from updates.
284 if not _is_firmware_update_supported(host):
Richard Barnette90ad4262016-11-17 17:29:24 -0800285 return
286 # Test 2 - The DUT has an assigned stable firmware version.
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800287 info = host.host_info_store.get()
Ningning Xia05af7402018-02-13 18:19:10 -0800288 if info.model is None:
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800289 raise hosts.AutoservVerifyError(
290 'Can not verify firmware version. '
Ningning Xia05af7402018-02-13 18:19:10 -0800291 'No model label value found')
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800292
C Shapiro70b70672019-05-24 11:26:16 -0600293 stable_firmware = None
294 try:
Gregory Nisbet7fe11c22019-11-22 11:06:06 -0800295 stable_firmware = afe_utils.get_stable_firmware_version_v2(info)
C Shapiro70b70672019-05-24 11:26:16 -0600296 except Exception as e:
297 logging.exception('Failed lookup to AFE for stable fw version '
298 ' with exception: %s', e)
299
Richard Barnette90ad4262016-11-17 17:29:24 -0800300 if stable_firmware is None:
301 # This DUT doesn't have a firmware update target
302 return
303
304 # For tests 3 and 4: If the output from `crossystem` or
305 # `chromeos-firmwareupdate` isn't what we expect, we log an
306 # error, but don't fail: We don't want DUTs unable to test a
307 # build merely because of a bug or change in either of those
308 # commands.
309
310 # Test 3 - The DUT is not running the target stable firmware.
311 current_firmware = self._get_rw_firmware(host)
312 if current_firmware is None:
313 logging.error('DUT firmware version can\'t be determined.')
314 return
315 if current_firmware == stable_firmware:
316 return
317 # Test 4 - The firmware supplied in the running OS build is not
318 # the assigned stable firmware.
Ningning Xia05af7402018-02-13 18:19:10 -0800319 available_firmware = _get_available_firmware(host, info.model)
Richard Barnette90ad4262016-11-17 17:29:24 -0800320 if available_firmware is None:
321 logging.error('Supplied firmware version in OS can\'t be '
322 'determined.')
323 return
324 if available_firmware != stable_firmware:
325 raise hosts.AutoservVerifyError(
326 'DUT firmware requires update from %s to %s' %
327 (current_firmware, stable_firmware))
328 # Time to update the firmware.
329 logging.info('Updating firmware from %s to %s',
330 current_firmware, stable_firmware)
331 self._check_hardware_match(current_firmware, stable_firmware)
332 try:
333 host.run('chromeos-firmwareupdate --mode=autoupdate')
334 host.reboot()
335 except Exception as e:
336 message = ('chromeos-firmwareupdate failed: from '
337 '%s to %s')
338 logging.exception(message, current_firmware, stable_firmware)
339 raise hosts.AutoservVerifyError(
340 message % (current_firmware, stable_firmware))
Richard Barnette1b489932017-02-14 10:50:58 -0800341 final_firmware = self._get_rw_firmware(host)
342 if final_firmware != stable_firmware:
343 message = ('chromeos-firmwareupdate failed: tried upgrade '
344 'to %s, now running %s instead')
345 raise hosts.AutoservVerifyError(
346 message % (stable_firmware, final_firmware))
Richard Barnette90ad4262016-11-17 17:29:24 -0800347
348 @property
349 def description(self):
350 return 'The firmware on this DUT is up-to-date'