blob: 0b8af70b4a77b123d0be0dd726f1b44bf25ec5ae [file] [log] [blame]
Richard Barnette90ad4262016-11-17 17:29:24 -08001# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Richard Barnette1bf22a32016-11-18 16:14:31 -08005"""
6Repair actions and verifiers relating to CrOS firmware.
7
8This contains the repair actions and verifiers need to find problems
9with the firmware installed on Chrome OS DUTs, and when necessary, to
10fix problems by updating or re-installing the firmware.
Richard Barnette077665e2016-11-29 16:00:59 -080011
12The operations in the module support two distinct use cases:
13 * DUTs used for FAFT tests can in some cases have problems with
14 corrupted firmware. The module supplies `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070015 to check for corruption, and supplies `FaftFirmwareRepair` to
16 re-install firmware of current faft stable_version via servo
17 when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080018 * DUTs used for general testing normally should be running a
19 designated "stable" firmware version. This module supplies
20 `FirmwareVersionVerifier` to detect and automatically update
Garry Wangad2a1712020-03-26 15:06:43 -070021 firmware that is out-of-date from the designated version. This model
22 also supplys `GeneralFirmwareRepair` to re-install firmware that
23 tied with current stable_version image via servo when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080024
25For purposes of the operations in the module, we distinguish three kinds
26of DUT, based on pool assignments:
27 * DUTs used for general testing. These DUTs automatically check for
28 and install the stable firmware using `FirmwareVersionVerifier`.
29 * DUTs in pools used for FAFT testing. These check for bad firmware
30 builds with `FirmwareStatusVerifier`, and will fix problems using
31 `FirmwareRepair`. These DUTs don't check for or install the
32 stable firmware.
33 * DUTs not in general pools, and not used for FAFT. These DUTs
34 are expected to be managed by separate processes and are excluded
35 from all of the verification and repair code in this module.
Richard Barnette1bf22a32016-11-18 16:14:31 -080036"""
37
Xixuan Wu93e646c2017-12-07 18:36:10 -080038# pylint: disable=missing-docstring
39
Hung-Te Lina014dbc2019-11-07 16:41:42 +080040import json
Richard Barnette90ad4262016-11-17 17:29:24 -080041import logging
Richard Barnette90ad4262016-11-17 17:29:24 -080042
43import common
Richard Barnette1bf22a32016-11-18 16:14:31 -080044from autotest_lib.client.common_lib import global_config
Richard Barnette90ad4262016-11-17 17:29:24 -080045from autotest_lib.client.common_lib import hosts
46from autotest_lib.server import afe_utils
Richard Barnette3245ae22018-08-31 11:50:08 -070047from autotest_lib.server.hosts import repair_utils
Richard Barnette1bf22a32016-11-18 16:14:31 -080048
49
Richard Barnette077665e2016-11-29 16:00:59 -080050# _FIRMWARE_REPAIR_POOLS - The set of pools that should be
51# managed by `FirmwareStatusVerifier` and `FirmwareRepair`.
52#
53_FIRMWARE_REPAIR_POOLS = set(
54 global_config.global_config.get_config_value(
55 'CROS',
56 'pools_support_firmware_repair',
57 type=str).split(','))
58
59
Garry Wangad2a1712020-03-26 15:06:43 -070060def _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -080061 """
Garry Wangad2a1712020-03-26 15:06:43 -070062 check if a host is dedicated for firmware testing.
Richard Barnette1bf22a32016-11-18 16:14:31 -080063
Richard Barnette077665e2016-11-29 16:00:59 -080064 When this function returns true, the DUT should be managed by
Garry Wangad2a1712020-03-26 15:06:43 -070065 `FirmwareStatusVerifier` and `FaftFirmwareRepair`, but not
66 `FirmwareVersionVerifier` and `GeneralFirmwareRepair.
Richard Barnette1bf22a32016-11-18 16:14:31 -080067
Richard Barnette077665e2016-11-29 16:00:59 -080068 @return A true value if the host should use `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070069 and `FaftFirmwareRepair`; a false value otherwise.
Richard Barnette1bf22a32016-11-18 16:14:31 -080070 """
Prathmesh Prabhub6cea612017-02-09 15:41:19 -080071 info = host.host_info_store.get()
72 return bool(info.pools & _FIRMWARE_REPAIR_POOLS)
Richard Barnette077665e2016-11-29 16:00:59 -080073
74
75def _is_firmware_update_supported(host):
76 """
77 Return whether a DUT should be running the standard firmware.
78
79 In the test lab, DUTs used for general testing, (e.g. the `bvt`
80 pool) need their firmware kept up-to-date with
81 `FirmwareVersionVerifier`. However, some pools have alternative
82 policies for firmware management. This returns whether a given DUT
83 should be updated via the standard stable version update, or
84 managed by some other procedure.
85
86 @param host The host to be checked for update policy.
87 @return A true value if the host should use
88 `FirmwareVersionVerifier`; a false value otherwise.
89 """
Garry Wangad2a1712020-03-26 15:06:43 -070090 return not _is_firmware_testing_device(host)
Richard Barnette1bf22a32016-11-18 16:14:31 -080091
92
Ningning Xia05af7402018-02-13 18:19:10 -080093def _get_available_firmware(host, model):
Hung-Te Lina014dbc2019-11-07 16:41:42 +080094 """Get the available RW firmware version given the model.
Ningning Xia05af7402018-02-13 18:19:10 -080095
96 @param host The host to get available firmware for.
97 @param model The model name to get corresponding firmware version.
Hung-Te Lina014dbc2019-11-07 16:41:42 +080098 @return The available RW firmware version if found, else, None.
Ningning Xia05af7402018-02-13 18:19:10 -080099 """
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800100 result = host.run('chromeos-firmwareupdate --manifest', ignore_status=True)
Ningning Xia05af7402018-02-13 18:19:10 -0800101
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800102 if result.exit_status != 0:
103 return None
Ningning Xia05af7402018-02-13 18:19:10 -0800104
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800105 # The manifest is a JSON in .model.host.versions.rw
106 data = json.loads(result.stdout) or {}
107 key = model if len(data) > 1 else next(data.iterkeys(), '')
108 key += '.host.versions.rw'
109 for k in key.split('.'):
110 data = data.get(k, {})
111 return data or None
Ningning Xia05af7402018-02-13 18:19:10 -0800112
113
Richard Barnette1bf22a32016-11-18 16:14:31 -0800114class FirmwareStatusVerifier(hosts.Verifier):
115 """
116 Verify that a host's firmware is in a good state.
117
118 For DUTs that run firmware tests, it's possible that the firmware
119 on the DUT can get corrupted. This verifier checks whether it
120 appears that firmware should be re-flashed using servo.
121 """
122
123 def verify(self, host):
Garry Wangad2a1712020-03-26 15:06:43 -0700124 if not _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -0800125 return
126 try:
127 # Read the AP firmware and dump the sections that we're
128 # interested in.
129 cmd = ('mkdir /tmp/verify_firmware; '
130 'cd /tmp/verify_firmware; '
131 'for section in VBLOCK_A VBLOCK_B FW_MAIN_A FW_MAIN_B; '
Chris McDonald9e6f9df2018-10-03 12:12:06 -0600132 'do flashrom -p host -r -i $section:$section; '
Richard Barnette1bf22a32016-11-18 16:14:31 -0800133 'done')
134 host.run(cmd)
135
136 # Verify the firmware blocks A and B.
137 cmd = ('vbutil_firmware --verify /tmp/verify_firmware/VBLOCK_%c'
138 ' --signpubkey /usr/share/vboot/devkeys/root_key.vbpubk'
139 ' --fv /tmp/verify_firmware/FW_MAIN_%c')
140 for c in ('A', 'B'):
141 rv = host.run(cmd % (c, c), ignore_status=True)
142 if rv.exit_status:
143 raise hosts.AutoservVerifyError(
144 'Firmware %c is in a bad state.' % c)
145 finally:
146 # Remove the temporary files.
147 host.run('rm -rf /tmp/verify_firmware')
148
149 @property
150 def description(self):
151 return 'Firmware on this DUT is clean'
Richard Barnette90ad4262016-11-17 17:29:24 -0800152
153
Richard Barnette077665e2016-11-29 16:00:59 -0800154class FirmwareRepair(hosts.RepairAction):
155 """
156 Reinstall the firmware image using servo.
157
158 This repair function attempts to use servo to install the DUT's
159 designated "stable firmware version".
160
161 This repair method only applies to DUTs used for FAFT.
162 """
Garry Wangad2a1712020-03-26 15:06:43 -0700163 def _get_stable_build(self, host):
164 raise NotImplementedError('Class %s does not implement '
165 '_get_stable_build()'
166 % type(self).__name__)
Richard Barnette077665e2016-11-29 16:00:59 -0800167
168 def repair(self, host):
Garry Wang6cac8542020-03-13 16:58:20 -0700169 repair_utils.require_servo(host, ignore_state=True)
Garry Wangad2a1712020-03-26 15:06:43 -0700170 build = self._get_stable_build(host)
171 if not build:
172 raise hosts.AutoservRepairError(
173 'Failed to find stable firmware build for %s.',
174 self.hostname, 'cannot find firmware stable_version')
175 host.firmware_install(build)
Richard Barnette077665e2016-11-29 16:00:59 -0800176
177 @property
178 def description(self):
179 return 'Re-install the stable firmware via servo'
180
181
Garry Wangad2a1712020-03-26 15:06:43 -0700182class FaftFirmwareRepair(FirmwareRepair):
183 """
184 Reinstall the firmware for DUTs in faft related pool.
185 """
186 def _get_stable_build(self, host):
187 info = host.host_info_store.get()
188 return afe_utils.get_stable_faft_version_v2(info)
189
190 def _is_applicable(self, host):
191 if _is_firmware_testing_device(host):
192 return True
193 else:
194 logging.info('Faft firmware repair is not applicable'
195 ' to host %s.', host.hostname)
196 return False
197
198
199class GeneralFirmwareRepair(FirmwareRepair):
200 """Reinstall the firmware for non-faft DUTs.
201 We need different RepairAction for non firmware testing DUT because
202 we want only try re-install firmware if all other RepairAction could
203 not restore ssh capability to the DUT.
204 """
205 def _get_stable_build(self, host):
206 # Use firmware in current stable os build.
207 return host.get_cros_repair_image_name()
208
209 def _is_applicable(self, host):
210 if not _is_firmware_testing_device(host):
211 return True
212 else:
213 logging.info('General firmware repair is not applicable'
214 ' to host %s.', host.hostname)
215 return False
216
217
Richard Barnette90ad4262016-11-17 17:29:24 -0800218class FirmwareVersionVerifier(hosts.Verifier):
219 """
220 Check for a firmware update, and apply it if appropriate.
221
222 This verifier checks to ensure that either the firmware on the DUT
223 is up-to-date, or that the target firmware can be installed from the
224 currently running build.
225
226 Failure occurs when all of the following apply:
Richard Barnette077665e2016-11-29 16:00:59 -0800227 1. The DUT is not excluded from updates. For example, DUTs used
228 for FAFT testing use `FirmwareRepair` instead.
229 2. The DUT's board has an assigned stable firmware version.
Richard Barnette90ad4262016-11-17 17:29:24 -0800230 3. The DUT is not running the assigned stable firmware.
231 4. The firmware supplied in the running OS build is not the
232 assigned stable firmware.
233
234 If the DUT needs an upgrade and the currently running OS build
Richard Barnette077665e2016-11-29 16:00:59 -0800235 supplies the necessary firmware, the verifier installs the new
236 firmware using `chromeos-firmwareupdate`. Failure to install will
237 cause the verifier to fail.
Richard Barnette90ad4262016-11-17 17:29:24 -0800238
239 This verifier nominally breaks the rule that "verifiers must succeed
240 quickly", since it can invoke `reboot()` during the success code
241 path. We're doing it anyway for two reasons:
242 * The time between updates will typically be measured in months,
243 so the amortized cost is low.
244 * The reason we distinguish repair from verify is to allow
245 rescheduling work immediately while the expensive repair happens
246 out-of-band. But a firmware update will likely hit all DUTs at
247 once, so it's pointless to pass the buck to repair.
248
249 N.B. This verifier is a trigger for all repair actions that install
250 the stable repair image. If the firmware is out-of-date, but the
251 stable repair image does *not* contain the proper firmware version,
252 _the target DUT will fail repair, and will be unable to fix itself_.
253 """
254
255 @staticmethod
256 def _get_rw_firmware(host):
257 result = host.run('crossystem fwid', ignore_status=True)
258 if result.exit_status == 0:
259 return result.stdout
260 else:
261 return None
262
263 @staticmethod
Richard Barnette90ad4262016-11-17 17:29:24 -0800264 def _check_hardware_match(version_a, version_b):
265 """
266 Check that two firmware versions identify the same hardware.
267
268 Firmware version strings look like this:
269 Google_Gnawty.5216.239.34
270 The part before the numbers identifies the hardware for which
271 the firmware was built. This function checks that the hardware
272 identified by `version_a` and `version_b` is the same.
273
274 This is a sanity check to protect us from installing the wrong
275 firmware on a DUT when a board label has somehow gone astray.
276
277 @param version_a First firmware version for the comparison.
278 @param version_b Second firmware version for the comparison.
279 """
280 hardware_a = version_a.split('.')[0]
281 hardware_b = version_b.split('.')[0]
282 if hardware_a != hardware_b:
283 message = 'Hardware/Firmware mismatch updating %s to %s'
284 raise hosts.AutoservVerifyError(
285 message % (version_a, version_b))
286
287 def verify(self, host):
Richard Barnette077665e2016-11-29 16:00:59 -0800288 # Test 1 - The DUT is not excluded from updates.
289 if not _is_firmware_update_supported(host):
Richard Barnette90ad4262016-11-17 17:29:24 -0800290 return
291 # Test 2 - The DUT has an assigned stable firmware version.
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800292 info = host.host_info_store.get()
Ningning Xia05af7402018-02-13 18:19:10 -0800293 if info.model is None:
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800294 raise hosts.AutoservVerifyError(
295 'Can not verify firmware version. '
Ningning Xia05af7402018-02-13 18:19:10 -0800296 'No model label value found')
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800297
C Shapiro70b70672019-05-24 11:26:16 -0600298 stable_firmware = None
299 try:
Gregory Nisbet7fe11c22019-11-22 11:06:06 -0800300 stable_firmware = afe_utils.get_stable_firmware_version_v2(info)
C Shapiro70b70672019-05-24 11:26:16 -0600301 except Exception as e:
302 logging.exception('Failed lookup to AFE for stable fw version '
303 ' with exception: %s', e)
304
Richard Barnette90ad4262016-11-17 17:29:24 -0800305 if stable_firmware is None:
306 # This DUT doesn't have a firmware update target
307 return
308
309 # For tests 3 and 4: If the output from `crossystem` or
310 # `chromeos-firmwareupdate` isn't what we expect, we log an
311 # error, but don't fail: We don't want DUTs unable to test a
312 # build merely because of a bug or change in either of those
313 # commands.
314
315 # Test 3 - The DUT is not running the target stable firmware.
316 current_firmware = self._get_rw_firmware(host)
317 if current_firmware is None:
318 logging.error('DUT firmware version can\'t be determined.')
319 return
320 if current_firmware == stable_firmware:
321 return
322 # Test 4 - The firmware supplied in the running OS build is not
323 # the assigned stable firmware.
Ningning Xia05af7402018-02-13 18:19:10 -0800324 available_firmware = _get_available_firmware(host, info.model)
Richard Barnette90ad4262016-11-17 17:29:24 -0800325 if available_firmware is None:
326 logging.error('Supplied firmware version in OS can\'t be '
327 'determined.')
328 return
329 if available_firmware != stable_firmware:
330 raise hosts.AutoservVerifyError(
331 'DUT firmware requires update from %s to %s' %
332 (current_firmware, stable_firmware))
333 # Time to update the firmware.
334 logging.info('Updating firmware from %s to %s',
335 current_firmware, stable_firmware)
336 self._check_hardware_match(current_firmware, stable_firmware)
337 try:
338 host.run('chromeos-firmwareupdate --mode=autoupdate')
339 host.reboot()
340 except Exception as e:
341 message = ('chromeos-firmwareupdate failed: from '
342 '%s to %s')
343 logging.exception(message, current_firmware, stable_firmware)
344 raise hosts.AutoservVerifyError(
345 message % (current_firmware, stable_firmware))
Richard Barnette1b489932017-02-14 10:50:58 -0800346 final_firmware = self._get_rw_firmware(host)
347 if final_firmware != stable_firmware:
348 message = ('chromeos-firmwareupdate failed: tried upgrade '
349 'to %s, now running %s instead')
350 raise hosts.AutoservVerifyError(
351 message % (stable_firmware, final_firmware))
Richard Barnette90ad4262016-11-17 17:29:24 -0800352
353 @property
354 def description(self):
355 return 'The firmware on this DUT is up-to-date'