blob: cce0747118603b0230fb36f60b63ac8cba2b3e0c [file] [log] [blame]
Richard Barnette90ad4262016-11-17 17:29:24 -08001# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Richard Barnette1bf22a32016-11-18 16:14:31 -08005"""
6Repair actions and verifiers relating to CrOS firmware.
7
8This contains the repair actions and verifiers need to find problems
9with the firmware installed on Chrome OS DUTs, and when necessary, to
10fix problems by updating or re-installing the firmware.
Richard Barnette077665e2016-11-29 16:00:59 -080011
12The operations in the module support two distinct use cases:
13 * DUTs used for FAFT tests can in some cases have problems with
14 corrupted firmware. The module supplies `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070015 to check for corruption, and supplies `FaftFirmwareRepair` to
16 re-install firmware of current faft stable_version via servo
17 when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080018 * DUTs used for general testing normally should be running a
19 designated "stable" firmware version. This module supplies
20 `FirmwareVersionVerifier` to detect and automatically update
Garry Wangad2a1712020-03-26 15:06:43 -070021 firmware that is out-of-date from the designated version. This model
22 also supplys `GeneralFirmwareRepair` to re-install firmware that
23 tied with current stable_version image via servo when needed.
Richard Barnette077665e2016-11-29 16:00:59 -080024
25For purposes of the operations in the module, we distinguish three kinds
26of DUT, based on pool assignments:
27 * DUTs used for general testing. These DUTs automatically check for
28 and install the stable firmware using `FirmwareVersionVerifier`.
29 * DUTs in pools used for FAFT testing. These check for bad firmware
30 builds with `FirmwareStatusVerifier`, and will fix problems using
31 `FirmwareRepair`. These DUTs don't check for or install the
32 stable firmware.
33 * DUTs not in general pools, and not used for FAFT. These DUTs
34 are expected to be managed by separate processes and are excluded
35 from all of the verification and repair code in this module.
Richard Barnette1bf22a32016-11-18 16:14:31 -080036"""
37
Xixuan Wu93e646c2017-12-07 18:36:10 -080038# pylint: disable=missing-docstring
39
Hung-Te Lina014dbc2019-11-07 16:41:42 +080040import json
Richard Barnette90ad4262016-11-17 17:29:24 -080041import logging
Richard Barnette90ad4262016-11-17 17:29:24 -080042
43import common
Richard Barnette1bf22a32016-11-18 16:14:31 -080044from autotest_lib.client.common_lib import global_config
Richard Barnette90ad4262016-11-17 17:29:24 -080045from autotest_lib.client.common_lib import hosts
46from autotest_lib.server import afe_utils
Richard Barnette3245ae22018-08-31 11:50:08 -070047from autotest_lib.server.hosts import repair_utils
Richard Barnette1bf22a32016-11-18 16:14:31 -080048
49
Richard Barnette077665e2016-11-29 16:00:59 -080050# _FIRMWARE_REPAIR_POOLS - The set of pools that should be
51# managed by `FirmwareStatusVerifier` and `FirmwareRepair`.
52#
53_FIRMWARE_REPAIR_POOLS = set(
54 global_config.global_config.get_config_value(
55 'CROS',
56 'pools_support_firmware_repair',
57 type=str).split(','))
58
59
Garry Wangad2a1712020-03-26 15:06:43 -070060def _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -080061 """
Garry Wangad2a1712020-03-26 15:06:43 -070062 check if a host is dedicated for firmware testing.
Richard Barnette1bf22a32016-11-18 16:14:31 -080063
Richard Barnette077665e2016-11-29 16:00:59 -080064 When this function returns true, the DUT should be managed by
Garry Wangad2a1712020-03-26 15:06:43 -070065 `FirmwareStatusVerifier` and `FaftFirmwareRepair`, but not
66 `FirmwareVersionVerifier` and `GeneralFirmwareRepair.
Richard Barnette1bf22a32016-11-18 16:14:31 -080067
Richard Barnette077665e2016-11-29 16:00:59 -080068 @return A true value if the host should use `FirmwareStatusVerifier`
Garry Wangad2a1712020-03-26 15:06:43 -070069 and `FaftFirmwareRepair`; a false value otherwise.
Richard Barnette1bf22a32016-11-18 16:14:31 -080070 """
Prathmesh Prabhub6cea612017-02-09 15:41:19 -080071 info = host.host_info_store.get()
72 return bool(info.pools & _FIRMWARE_REPAIR_POOLS)
Richard Barnette077665e2016-11-29 16:00:59 -080073
74
75def _is_firmware_update_supported(host):
76 """
77 Return whether a DUT should be running the standard firmware.
78
79 In the test lab, DUTs used for general testing, (e.g. the `bvt`
80 pool) need their firmware kept up-to-date with
81 `FirmwareVersionVerifier`. However, some pools have alternative
82 policies for firmware management. This returns whether a given DUT
83 should be updated via the standard stable version update, or
84 managed by some other procedure.
85
86 @param host The host to be checked for update policy.
87 @return A true value if the host should use
88 `FirmwareVersionVerifier`; a false value otherwise.
89 """
Garry Wangad2a1712020-03-26 15:06:43 -070090 return not _is_firmware_testing_device(host)
Richard Barnette1bf22a32016-11-18 16:14:31 -080091
92
Ningning Xia05af7402018-02-13 18:19:10 -080093def _get_available_firmware(host, model):
Hung-Te Lina014dbc2019-11-07 16:41:42 +080094 """Get the available RW firmware version given the model.
Ningning Xia05af7402018-02-13 18:19:10 -080095
96 @param host The host to get available firmware for.
97 @param model The model name to get corresponding firmware version.
Hung-Te Lina014dbc2019-11-07 16:41:42 +080098 @return The available RW firmware version if found, else, None.
Ningning Xia05af7402018-02-13 18:19:10 -080099 """
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800100 result = host.run('chromeos-firmwareupdate --manifest', ignore_status=True)
Ningning Xia05af7402018-02-13 18:19:10 -0800101
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800102 if result.exit_status != 0:
103 return None
Ningning Xia05af7402018-02-13 18:19:10 -0800104
Hung-Te Lina014dbc2019-11-07 16:41:42 +0800105 # The manifest is a JSON in .model.host.versions.rw
106 data = json.loads(result.stdout) or {}
107 key = model if len(data) > 1 else next(data.iterkeys(), '')
108 key += '.host.versions.rw'
109 for k in key.split('.'):
110 data = data.get(k, {})
111 return data or None
Ningning Xia05af7402018-02-13 18:19:10 -0800112
113
Richard Barnette1bf22a32016-11-18 16:14:31 -0800114class FirmwareStatusVerifier(hosts.Verifier):
115 """
116 Verify that a host's firmware is in a good state.
117
118 For DUTs that run firmware tests, it's possible that the firmware
119 on the DUT can get corrupted. This verifier checks whether it
120 appears that firmware should be re-flashed using servo.
121 """
122
123 def verify(self, host):
Garry Wangad2a1712020-03-26 15:06:43 -0700124 if not _is_firmware_testing_device(host):
Richard Barnette1bf22a32016-11-18 16:14:31 -0800125 return
126 try:
127 # Read the AP firmware and dump the sections that we're
128 # interested in.
129 cmd = ('mkdir /tmp/verify_firmware; '
130 'cd /tmp/verify_firmware; '
131 'for section in VBLOCK_A VBLOCK_B FW_MAIN_A FW_MAIN_B; '
Chris McDonald9e6f9df2018-10-03 12:12:06 -0600132 'do flashrom -p host -r -i $section:$section; '
Richard Barnette1bf22a32016-11-18 16:14:31 -0800133 'done')
134 host.run(cmd)
135
136 # Verify the firmware blocks A and B.
137 cmd = ('vbutil_firmware --verify /tmp/verify_firmware/VBLOCK_%c'
138 ' --signpubkey /usr/share/vboot/devkeys/root_key.vbpubk'
139 ' --fv /tmp/verify_firmware/FW_MAIN_%c')
140 for c in ('A', 'B'):
141 rv = host.run(cmd % (c, c), ignore_status=True)
142 if rv.exit_status:
143 raise hosts.AutoservVerifyError(
144 'Firmware %c is in a bad state.' % c)
145 finally:
146 # Remove the temporary files.
147 host.run('rm -rf /tmp/verify_firmware')
148
149 @property
150 def description(self):
151 return 'Firmware on this DUT is clean'
Richard Barnette90ad4262016-11-17 17:29:24 -0800152
153
Richard Barnette077665e2016-11-29 16:00:59 -0800154class FirmwareRepair(hosts.RepairAction):
155 """
156 Reinstall the firmware image using servo.
157
158 This repair function attempts to use servo to install the DUT's
159 designated "stable firmware version".
160
161 This repair method only applies to DUTs used for FAFT.
162 """
Garry Wangad2a1712020-03-26 15:06:43 -0700163 def _get_stable_build(self, host):
164 raise NotImplementedError('Class %s does not implement '
165 '_get_stable_build()'
166 % type(self).__name__)
Richard Barnette077665e2016-11-29 16:00:59 -0800167
168 def repair(self, host):
Garry Wang6cac8542020-03-13 16:58:20 -0700169 repair_utils.require_servo(host, ignore_state=True)
Garry Wangad2a1712020-03-26 15:06:43 -0700170 build = self._get_stable_build(host)
171 if not build:
172 raise hosts.AutoservRepairError(
173 'Failed to find stable firmware build for %s.',
Garry Wang20d6a722020-05-11 22:24:26 -0700174 host.hostname, 'cannot find firmware stable_version')
Garry Wangad2a1712020-03-26 15:06:43 -0700175 host.firmware_install(build)
Richard Barnette077665e2016-11-29 16:00:59 -0800176
Richard Barnette077665e2016-11-29 16:00:59 -0800177
Garry Wangad2a1712020-03-26 15:06:43 -0700178class FaftFirmwareRepair(FirmwareRepair):
179 """
180 Reinstall the firmware for DUTs in faft related pool.
181 """
182 def _get_stable_build(self, host):
183 info = host.host_info_store.get()
184 return afe_utils.get_stable_faft_version_v2(info)
185
186 def _is_applicable(self, host):
Garry Wang6c5fe582020-03-27 15:16:25 -0700187 return _is_firmware_testing_device(host)
188
189 @property
190 def description(self):
191 return 'Re-install the stable firmware(faft) via servo'
Garry Wangad2a1712020-03-26 15:06:43 -0700192
193
194class GeneralFirmwareRepair(FirmwareRepair):
195 """Reinstall the firmware for non-faft DUTs.
196 We need different RepairAction for non firmware testing DUT because
197 we want only try re-install firmware if all other RepairAction could
198 not restore ssh capability to the DUT.
199 """
200 def _get_stable_build(self, host):
201 # Use firmware in current stable os build.
202 return host.get_cros_repair_image_name()
203
204 def _is_applicable(self, host):
Garry Wang6c5fe582020-03-27 15:16:25 -0700205 return not _is_firmware_testing_device(host)
206
207 @property
208 def description(self):
209 return 'Re-install the stable firmware(non-faft) via servo'
Garry Wangad2a1712020-03-26 15:06:43 -0700210
211
Richard Barnette90ad4262016-11-17 17:29:24 -0800212class FirmwareVersionVerifier(hosts.Verifier):
213 """
214 Check for a firmware update, and apply it if appropriate.
215
216 This verifier checks to ensure that either the firmware on the DUT
217 is up-to-date, or that the target firmware can be installed from the
218 currently running build.
219
220 Failure occurs when all of the following apply:
Richard Barnette077665e2016-11-29 16:00:59 -0800221 1. The DUT is not excluded from updates. For example, DUTs used
222 for FAFT testing use `FirmwareRepair` instead.
223 2. The DUT's board has an assigned stable firmware version.
Richard Barnette90ad4262016-11-17 17:29:24 -0800224 3. The DUT is not running the assigned stable firmware.
225 4. The firmware supplied in the running OS build is not the
226 assigned stable firmware.
227
228 If the DUT needs an upgrade and the currently running OS build
Richard Barnette077665e2016-11-29 16:00:59 -0800229 supplies the necessary firmware, the verifier installs the new
230 firmware using `chromeos-firmwareupdate`. Failure to install will
231 cause the verifier to fail.
Richard Barnette90ad4262016-11-17 17:29:24 -0800232
233 This verifier nominally breaks the rule that "verifiers must succeed
234 quickly", since it can invoke `reboot()` during the success code
235 path. We're doing it anyway for two reasons:
236 * The time between updates will typically be measured in months,
237 so the amortized cost is low.
238 * The reason we distinguish repair from verify is to allow
239 rescheduling work immediately while the expensive repair happens
240 out-of-band. But a firmware update will likely hit all DUTs at
241 once, so it's pointless to pass the buck to repair.
242
243 N.B. This verifier is a trigger for all repair actions that install
244 the stable repair image. If the firmware is out-of-date, but the
245 stable repair image does *not* contain the proper firmware version,
246 _the target DUT will fail repair, and will be unable to fix itself_.
247 """
248
249 @staticmethod
250 def _get_rw_firmware(host):
251 result = host.run('crossystem fwid', ignore_status=True)
252 if result.exit_status == 0:
253 return result.stdout
254 else:
255 return None
256
257 @staticmethod
Richard Barnette90ad4262016-11-17 17:29:24 -0800258 def _check_hardware_match(version_a, version_b):
259 """
260 Check that two firmware versions identify the same hardware.
261
262 Firmware version strings look like this:
263 Google_Gnawty.5216.239.34
264 The part before the numbers identifies the hardware for which
265 the firmware was built. This function checks that the hardware
266 identified by `version_a` and `version_b` is the same.
267
268 This is a sanity check to protect us from installing the wrong
269 firmware on a DUT when a board label has somehow gone astray.
270
271 @param version_a First firmware version for the comparison.
272 @param version_b Second firmware version for the comparison.
273 """
274 hardware_a = version_a.split('.')[0]
275 hardware_b = version_b.split('.')[0]
276 if hardware_a != hardware_b:
277 message = 'Hardware/Firmware mismatch updating %s to %s'
278 raise hosts.AutoservVerifyError(
279 message % (version_a, version_b))
280
281 def verify(self, host):
Richard Barnette077665e2016-11-29 16:00:59 -0800282 # Test 1 - The DUT is not excluded from updates.
283 if not _is_firmware_update_supported(host):
Richard Barnette90ad4262016-11-17 17:29:24 -0800284 return
285 # Test 2 - The DUT has an assigned stable firmware version.
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800286 info = host.host_info_store.get()
Ningning Xia05af7402018-02-13 18:19:10 -0800287 if info.model is None:
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800288 raise hosts.AutoservVerifyError(
289 'Can not verify firmware version. '
Ningning Xia05af7402018-02-13 18:19:10 -0800290 'No model label value found')
Prathmesh Prabhu075fc922017-02-13 11:50:25 -0800291
C Shapiro70b70672019-05-24 11:26:16 -0600292 stable_firmware = None
293 try:
Gregory Nisbet7fe11c22019-11-22 11:06:06 -0800294 stable_firmware = afe_utils.get_stable_firmware_version_v2(info)
C Shapiro70b70672019-05-24 11:26:16 -0600295 except Exception as e:
296 logging.exception('Failed lookup to AFE for stable fw version '
297 ' with exception: %s', e)
298
Richard Barnette90ad4262016-11-17 17:29:24 -0800299 if stable_firmware is None:
300 # This DUT doesn't have a firmware update target
301 return
302
303 # For tests 3 and 4: If the output from `crossystem` or
304 # `chromeos-firmwareupdate` isn't what we expect, we log an
305 # error, but don't fail: We don't want DUTs unable to test a
306 # build merely because of a bug or change in either of those
307 # commands.
308
309 # Test 3 - The DUT is not running the target stable firmware.
310 current_firmware = self._get_rw_firmware(host)
311 if current_firmware is None:
312 logging.error('DUT firmware version can\'t be determined.')
313 return
314 if current_firmware == stable_firmware:
315 return
316 # Test 4 - The firmware supplied in the running OS build is not
317 # the assigned stable firmware.
Ningning Xia05af7402018-02-13 18:19:10 -0800318 available_firmware = _get_available_firmware(host, info.model)
Richard Barnette90ad4262016-11-17 17:29:24 -0800319 if available_firmware is None:
320 logging.error('Supplied firmware version in OS can\'t be '
321 'determined.')
322 return
323 if available_firmware != stable_firmware:
324 raise hosts.AutoservVerifyError(
325 'DUT firmware requires update from %s to %s' %
326 (current_firmware, stable_firmware))
327 # Time to update the firmware.
328 logging.info('Updating firmware from %s to %s',
329 current_firmware, stable_firmware)
330 self._check_hardware_match(current_firmware, stable_firmware)
331 try:
332 host.run('chromeos-firmwareupdate --mode=autoupdate')
333 host.reboot()
334 except Exception as e:
335 message = ('chromeos-firmwareupdate failed: from '
336 '%s to %s')
337 logging.exception(message, current_firmware, stable_firmware)
338 raise hosts.AutoservVerifyError(
339 message % (current_firmware, stable_firmware))
Richard Barnette1b489932017-02-14 10:50:58 -0800340 final_firmware = self._get_rw_firmware(host)
341 if final_firmware != stable_firmware:
342 message = ('chromeos-firmwareupdate failed: tried upgrade '
343 'to %s, now running %s instead')
344 raise hosts.AutoservVerifyError(
345 message % (stable_firmware, final_firmware))
Richard Barnette90ad4262016-11-17 17:29:24 -0800346
347 @property
348 def description(self):
349 return 'The firmware on this DUT is up-to-date'