Richard Barnette | 90ad426 | 2016-11-17 17:29:24 -0800 | [diff] [blame] | 1 | # Copyright 2016 The Chromium OS Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
Richard Barnette | 1bf22a3 | 2016-11-18 16:14:31 -0800 | [diff] [blame] | 5 | """ |
| 6 | Repair actions and verifiers relating to CrOS firmware. |
| 7 | |
| 8 | This contains the repair actions and verifiers need to find problems |
| 9 | with the firmware installed on Chrome OS DUTs, and when necessary, to |
| 10 | fix problems by updating or re-installing the firmware. |
Richard Barnette | 077665e | 2016-11-29 16:00:59 -0800 | [diff] [blame] | 11 | |
| 12 | The operations in the module support two distinct use cases: |
| 13 | * DUTs used for FAFT tests can in some cases have problems with |
| 14 | corrupted firmware. The module supplies `FirmwareStatusVerifier` |
| 15 | to check for corruption, and supplies `FirmwareRepair` to re-install |
| 16 | firmware via servo when needed. |
| 17 | * DUTs used for general testing normally should be running a |
| 18 | designated "stable" firmware version. This module supplies |
| 19 | `FirmwareVersionVerifier` to detect and automatically update |
| 20 | firmware that is out-of-date from the designated version. |
| 21 | |
| 22 | For purposes of the operations in the module, we distinguish three kinds |
| 23 | of DUT, based on pool assignments: |
| 24 | * DUTs used for general testing. These DUTs automatically check for |
| 25 | and install the stable firmware using `FirmwareVersionVerifier`. |
| 26 | * DUTs in pools used for FAFT testing. These check for bad firmware |
| 27 | builds with `FirmwareStatusVerifier`, and will fix problems using |
| 28 | `FirmwareRepair`. These DUTs don't check for or install the |
| 29 | stable firmware. |
| 30 | * DUTs not in general pools, and not used for FAFT. These DUTs |
| 31 | are expected to be managed by separate processes and are excluded |
| 32 | from all of the verification and repair code in this module. |
Richard Barnette | 1bf22a3 | 2016-11-18 16:14:31 -0800 | [diff] [blame] | 33 | """ |
| 34 | |
Xixuan Wu | 93e646c | 2017-12-07 18:36:10 -0800 | [diff] [blame] | 35 | # pylint: disable=missing-docstring |
| 36 | |
Richard Barnette | 90ad426 | 2016-11-17 17:29:24 -0800 | [diff] [blame] | 37 | import logging |
| 38 | import re |
| 39 | |
| 40 | import common |
Richard Barnette | 1bf22a3 | 2016-11-18 16:14:31 -0800 | [diff] [blame] | 41 | from autotest_lib.client.common_lib import global_config |
Richard Barnette | 90ad426 | 2016-11-17 17:29:24 -0800 | [diff] [blame] | 42 | from autotest_lib.client.common_lib import hosts |
| 43 | from autotest_lib.server import afe_utils |
Richard Barnette | 3245ae2 | 2018-08-31 11:50:08 -0700 | [diff] [blame] | 44 | from autotest_lib.server.hosts import repair_utils |
Richard Barnette | 1bf22a3 | 2016-11-18 16:14:31 -0800 | [diff] [blame] | 45 | |
| 46 | |
Richard Barnette | 077665e | 2016-11-29 16:00:59 -0800 | [diff] [blame] | 47 | # _FIRMWARE_REPAIR_POOLS - The set of pools that should be |
| 48 | # managed by `FirmwareStatusVerifier` and `FirmwareRepair`. |
| 49 | # |
| 50 | _FIRMWARE_REPAIR_POOLS = set( |
| 51 | global_config.global_config.get_config_value( |
| 52 | 'CROS', |
| 53 | 'pools_support_firmware_repair', |
| 54 | type=str).split(',')) |
| 55 | |
| 56 | |
Richard Barnette | 1bf22a3 | 2016-11-18 16:14:31 -0800 | [diff] [blame] | 57 | def _is_firmware_repair_supported(host): |
| 58 | """ |
| 59 | Check if a host supports firmware repair. |
| 60 | |
Richard Barnette | 077665e | 2016-11-29 16:00:59 -0800 | [diff] [blame] | 61 | When this function returns true, the DUT should be managed by |
| 62 | `FirmwareStatusVerifier` and `FirmwareRepair`, but not |
| 63 | `FirmwareVersionVerifier`. In general, this applies to DUTs |
| 64 | used for firmware testing. |
Richard Barnette | 1bf22a3 | 2016-11-18 16:14:31 -0800 | [diff] [blame] | 65 | |
Richard Barnette | 077665e | 2016-11-29 16:00:59 -0800 | [diff] [blame] | 66 | @return A true value if the host should use `FirmwareStatusVerifier` |
| 67 | and `FirmwareRepair`; a false value otherwise. |
Richard Barnette | 1bf22a3 | 2016-11-18 16:14:31 -0800 | [diff] [blame] | 68 | """ |
Prathmesh Prabhu | b6cea61 | 2017-02-09 15:41:19 -0800 | [diff] [blame] | 69 | info = host.host_info_store.get() |
| 70 | return bool(info.pools & _FIRMWARE_REPAIR_POOLS) |
Richard Barnette | 077665e | 2016-11-29 16:00:59 -0800 | [diff] [blame] | 71 | |
| 72 | |
| 73 | def _is_firmware_update_supported(host): |
| 74 | """ |
| 75 | Return whether a DUT should be running the standard firmware. |
| 76 | |
| 77 | In the test lab, DUTs used for general testing, (e.g. the `bvt` |
| 78 | pool) need their firmware kept up-to-date with |
| 79 | `FirmwareVersionVerifier`. However, some pools have alternative |
| 80 | policies for firmware management. This returns whether a given DUT |
| 81 | should be updated via the standard stable version update, or |
| 82 | managed by some other procedure. |
| 83 | |
| 84 | @param host The host to be checked for update policy. |
| 85 | @return A true value if the host should use |
| 86 | `FirmwareVersionVerifier`; a false value otherwise. |
| 87 | """ |
Richard Barnette | fc46583 | 2018-07-13 14:32:16 -0700 | [diff] [blame] | 88 | return not _is_firmware_repair_supported(host) |
Richard Barnette | 1bf22a3 | 2016-11-18 16:14:31 -0800 | [diff] [blame] | 89 | |
| 90 | |
Ningning Xia | 05af740 | 2018-02-13 18:19:10 -0800 | [diff] [blame] | 91 | def _get_firmware_version(output): |
| 92 | """Parse the output and get the firmware version. |
| 93 | |
| 94 | @param output The standard output of chromeos-firmwareupdate script. |
| 95 | @return Firmware version if found, else, None. |
| 96 | """ |
| 97 | # At one point, the chromeos-firmwareupdate script was updated to |
| 98 | # add "RW" version fields. The old string, "BIOS version:" still |
| 99 | # appears in the new output, however it now refers to the RO |
| 100 | # firmware version. Therefore, we try searching for the new string |
| 101 | # first, "BIOS (RW) version". If that string isn't found, we then |
| 102 | # fallback to searching for old string. |
| 103 | version = re.search(r'BIOS \(RW\) version:\s*(?P<version>.*)', output) |
| 104 | |
| 105 | if not version: |
| 106 | version = re.search(r'BIOS version:\s*(?P<version>.*)', output) |
| 107 | |
| 108 | if version is not None: |
| 109 | return version.group('version') |
| 110 | |
| 111 | return None |
| 112 | |
| 113 | |
| 114 | def _get_available_firmware(host, model): |
| 115 | """Get the available firmware version given the model. |
| 116 | |
| 117 | @param host The host to get available firmware for. |
| 118 | @param model The model name to get corresponding firmware version. |
| 119 | @return The available firmware version if found, else, None. |
| 120 | """ |
| 121 | result = host.run('chromeos-firmwareupdate -V', ignore_status=True) |
| 122 | |
| 123 | if result.exit_status == 0: |
| 124 | unibuild = False |
| 125 | paragraphs = result.stdout.split('\n\n') |
| 126 | for p in paragraphs: |
| 127 | match = re.search(r'Model:\s*(?P<model>.*)', p) |
| 128 | if match: |
| 129 | unibuild = True |
| 130 | if model == match.group('model'): |
| 131 | return _get_firmware_version(p) |
| 132 | |
| 133 | if not unibuild: |
| 134 | return _get_firmware_version(result.stdout) |
| 135 | |
| 136 | return None |
| 137 | |
| 138 | |
Richard Barnette | 1bf22a3 | 2016-11-18 16:14:31 -0800 | [diff] [blame] | 139 | class FirmwareStatusVerifier(hosts.Verifier): |
| 140 | """ |
| 141 | Verify that a host's firmware is in a good state. |
| 142 | |
| 143 | For DUTs that run firmware tests, it's possible that the firmware |
| 144 | on the DUT can get corrupted. This verifier checks whether it |
| 145 | appears that firmware should be re-flashed using servo. |
| 146 | """ |
| 147 | |
| 148 | def verify(self, host): |
| 149 | if not _is_firmware_repair_supported(host): |
| 150 | return |
| 151 | try: |
| 152 | # Read the AP firmware and dump the sections that we're |
| 153 | # interested in. |
| 154 | cmd = ('mkdir /tmp/verify_firmware; ' |
| 155 | 'cd /tmp/verify_firmware; ' |
| 156 | 'for section in VBLOCK_A VBLOCK_B FW_MAIN_A FW_MAIN_B; ' |
Kevin Shelton | 0886d3d | 2018-06-05 13:07:45 -0700 | [diff] [blame] | 157 | 'do flashrom -r -i $section:$section; ' |
Richard Barnette | 1bf22a3 | 2016-11-18 16:14:31 -0800 | [diff] [blame] | 158 | 'done') |
| 159 | host.run(cmd) |
| 160 | |
| 161 | # Verify the firmware blocks A and B. |
| 162 | cmd = ('vbutil_firmware --verify /tmp/verify_firmware/VBLOCK_%c' |
| 163 | ' --signpubkey /usr/share/vboot/devkeys/root_key.vbpubk' |
| 164 | ' --fv /tmp/verify_firmware/FW_MAIN_%c') |
| 165 | for c in ('A', 'B'): |
| 166 | rv = host.run(cmd % (c, c), ignore_status=True) |
| 167 | if rv.exit_status: |
| 168 | raise hosts.AutoservVerifyError( |
| 169 | 'Firmware %c is in a bad state.' % c) |
| 170 | finally: |
| 171 | # Remove the temporary files. |
| 172 | host.run('rm -rf /tmp/verify_firmware') |
| 173 | |
| 174 | @property |
| 175 | def description(self): |
| 176 | return 'Firmware on this DUT is clean' |
Richard Barnette | 90ad426 | 2016-11-17 17:29:24 -0800 | [diff] [blame] | 177 | |
| 178 | |
Richard Barnette | 077665e | 2016-11-29 16:00:59 -0800 | [diff] [blame] | 179 | class FirmwareRepair(hosts.RepairAction): |
| 180 | """ |
| 181 | Reinstall the firmware image using servo. |
| 182 | |
| 183 | This repair function attempts to use servo to install the DUT's |
| 184 | designated "stable firmware version". |
| 185 | |
| 186 | This repair method only applies to DUTs used for FAFT. |
| 187 | """ |
| 188 | |
| 189 | def repair(self, host): |
| 190 | if not _is_firmware_repair_supported(host): |
| 191 | raise hosts.AutoservRepairError( |
| 192 | 'Firmware repair is not applicable to host %s.' % |
| 193 | host.hostname) |
Richard Barnette | 3245ae2 | 2018-08-31 11:50:08 -0700 | [diff] [blame] | 194 | repair_utils.require_servo(host) |
Richard Barnette | 077665e | 2016-11-29 16:00:59 -0800 | [diff] [blame] | 195 | host.firmware_install() |
| 196 | |
| 197 | @property |
| 198 | def description(self): |
| 199 | return 'Re-install the stable firmware via servo' |
| 200 | |
| 201 | |
Richard Barnette | 90ad426 | 2016-11-17 17:29:24 -0800 | [diff] [blame] | 202 | class FirmwareVersionVerifier(hosts.Verifier): |
| 203 | """ |
| 204 | Check for a firmware update, and apply it if appropriate. |
| 205 | |
| 206 | This verifier checks to ensure that either the firmware on the DUT |
| 207 | is up-to-date, or that the target firmware can be installed from the |
| 208 | currently running build. |
| 209 | |
| 210 | Failure occurs when all of the following apply: |
Richard Barnette | 077665e | 2016-11-29 16:00:59 -0800 | [diff] [blame] | 211 | 1. The DUT is not excluded from updates. For example, DUTs used |
| 212 | for FAFT testing use `FirmwareRepair` instead. |
| 213 | 2. The DUT's board has an assigned stable firmware version. |
Richard Barnette | 90ad426 | 2016-11-17 17:29:24 -0800 | [diff] [blame] | 214 | 3. The DUT is not running the assigned stable firmware. |
| 215 | 4. The firmware supplied in the running OS build is not the |
| 216 | assigned stable firmware. |
| 217 | |
| 218 | If the DUT needs an upgrade and the currently running OS build |
Richard Barnette | 077665e | 2016-11-29 16:00:59 -0800 | [diff] [blame] | 219 | supplies the necessary firmware, the verifier installs the new |
| 220 | firmware using `chromeos-firmwareupdate`. Failure to install will |
| 221 | cause the verifier to fail. |
Richard Barnette | 90ad426 | 2016-11-17 17:29:24 -0800 | [diff] [blame] | 222 | |
| 223 | This verifier nominally breaks the rule that "verifiers must succeed |
| 224 | quickly", since it can invoke `reboot()` during the success code |
| 225 | path. We're doing it anyway for two reasons: |
| 226 | * The time between updates will typically be measured in months, |
| 227 | so the amortized cost is low. |
| 228 | * The reason we distinguish repair from verify is to allow |
| 229 | rescheduling work immediately while the expensive repair happens |
| 230 | out-of-band. But a firmware update will likely hit all DUTs at |
| 231 | once, so it's pointless to pass the buck to repair. |
| 232 | |
| 233 | N.B. This verifier is a trigger for all repair actions that install |
| 234 | the stable repair image. If the firmware is out-of-date, but the |
| 235 | stable repair image does *not* contain the proper firmware version, |
| 236 | _the target DUT will fail repair, and will be unable to fix itself_. |
| 237 | """ |
| 238 | |
| 239 | @staticmethod |
| 240 | def _get_rw_firmware(host): |
| 241 | result = host.run('crossystem fwid', ignore_status=True) |
| 242 | if result.exit_status == 0: |
| 243 | return result.stdout |
| 244 | else: |
| 245 | return None |
| 246 | |
| 247 | @staticmethod |
Richard Barnette | 90ad426 | 2016-11-17 17:29:24 -0800 | [diff] [blame] | 248 | def _check_hardware_match(version_a, version_b): |
| 249 | """ |
| 250 | Check that two firmware versions identify the same hardware. |
| 251 | |
| 252 | Firmware version strings look like this: |
| 253 | Google_Gnawty.5216.239.34 |
| 254 | The part before the numbers identifies the hardware for which |
| 255 | the firmware was built. This function checks that the hardware |
| 256 | identified by `version_a` and `version_b` is the same. |
| 257 | |
| 258 | This is a sanity check to protect us from installing the wrong |
| 259 | firmware on a DUT when a board label has somehow gone astray. |
| 260 | |
| 261 | @param version_a First firmware version for the comparison. |
| 262 | @param version_b Second firmware version for the comparison. |
| 263 | """ |
| 264 | hardware_a = version_a.split('.')[0] |
| 265 | hardware_b = version_b.split('.')[0] |
| 266 | if hardware_a != hardware_b: |
| 267 | message = 'Hardware/Firmware mismatch updating %s to %s' |
| 268 | raise hosts.AutoservVerifyError( |
| 269 | message % (version_a, version_b)) |
| 270 | |
| 271 | def verify(self, host): |
Richard Barnette | 077665e | 2016-11-29 16:00:59 -0800 | [diff] [blame] | 272 | # Test 1 - The DUT is not excluded from updates. |
| 273 | if not _is_firmware_update_supported(host): |
Richard Barnette | 90ad426 | 2016-11-17 17:29:24 -0800 | [diff] [blame] | 274 | return |
| 275 | # Test 2 - The DUT has an assigned stable firmware version. |
Prathmesh Prabhu | 075fc92 | 2017-02-13 11:50:25 -0800 | [diff] [blame] | 276 | info = host.host_info_store.get() |
Ningning Xia | 05af740 | 2018-02-13 18:19:10 -0800 | [diff] [blame] | 277 | if info.model is None: |
Prathmesh Prabhu | 075fc92 | 2017-02-13 11:50:25 -0800 | [diff] [blame] | 278 | raise hosts.AutoservVerifyError( |
| 279 | 'Can not verify firmware version. ' |
Ningning Xia | 05af740 | 2018-02-13 18:19:10 -0800 | [diff] [blame] | 280 | 'No model label value found') |
Prathmesh Prabhu | 075fc92 | 2017-02-13 11:50:25 -0800 | [diff] [blame] | 281 | |
Ningning Xia | 05af740 | 2018-02-13 18:19:10 -0800 | [diff] [blame] | 282 | stable_firmware = afe_utils.get_stable_firmware_version(info.model) |
Richard Barnette | 90ad426 | 2016-11-17 17:29:24 -0800 | [diff] [blame] | 283 | if stable_firmware is None: |
| 284 | # This DUT doesn't have a firmware update target |
| 285 | return |
| 286 | |
| 287 | # For tests 3 and 4: If the output from `crossystem` or |
| 288 | # `chromeos-firmwareupdate` isn't what we expect, we log an |
| 289 | # error, but don't fail: We don't want DUTs unable to test a |
| 290 | # build merely because of a bug or change in either of those |
| 291 | # commands. |
| 292 | |
| 293 | # Test 3 - The DUT is not running the target stable firmware. |
| 294 | current_firmware = self._get_rw_firmware(host) |
| 295 | if current_firmware is None: |
| 296 | logging.error('DUT firmware version can\'t be determined.') |
| 297 | return |
| 298 | if current_firmware == stable_firmware: |
| 299 | return |
| 300 | # Test 4 - The firmware supplied in the running OS build is not |
| 301 | # the assigned stable firmware. |
Ningning Xia | 05af740 | 2018-02-13 18:19:10 -0800 | [diff] [blame] | 302 | available_firmware = _get_available_firmware(host, info.model) |
Richard Barnette | 90ad426 | 2016-11-17 17:29:24 -0800 | [diff] [blame] | 303 | if available_firmware is None: |
| 304 | logging.error('Supplied firmware version in OS can\'t be ' |
| 305 | 'determined.') |
| 306 | return |
| 307 | if available_firmware != stable_firmware: |
| 308 | raise hosts.AutoservVerifyError( |
| 309 | 'DUT firmware requires update from %s to %s' % |
| 310 | (current_firmware, stable_firmware)) |
| 311 | # Time to update the firmware. |
| 312 | logging.info('Updating firmware from %s to %s', |
| 313 | current_firmware, stable_firmware) |
| 314 | self._check_hardware_match(current_firmware, stable_firmware) |
| 315 | try: |
| 316 | host.run('chromeos-firmwareupdate --mode=autoupdate') |
| 317 | host.reboot() |
| 318 | except Exception as e: |
| 319 | message = ('chromeos-firmwareupdate failed: from ' |
| 320 | '%s to %s') |
| 321 | logging.exception(message, current_firmware, stable_firmware) |
| 322 | raise hosts.AutoservVerifyError( |
| 323 | message % (current_firmware, stable_firmware)) |
Richard Barnette | 1b48993 | 2017-02-14 10:50:58 -0800 | [diff] [blame] | 324 | final_firmware = self._get_rw_firmware(host) |
| 325 | if final_firmware != stable_firmware: |
| 326 | message = ('chromeos-firmwareupdate failed: tried upgrade ' |
| 327 | 'to %s, now running %s instead') |
| 328 | raise hosts.AutoservVerifyError( |
| 329 | message % (stable_firmware, final_firmware)) |
Richard Barnette | 90ad426 | 2016-11-17 17:29:24 -0800 | [diff] [blame] | 330 | |
| 331 | @property |
| 332 | def description(self): |
| 333 | return 'The firmware on this DUT is up-to-date' |