blob: a41d61124e16913703cd350d98795261022ecc83 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
mussa584b4462014-06-20 15:13:28 -07005import ConfigParser
Aviv Keshet74c89a92013-02-04 15:18:30 -08006import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07007import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import logging
Dan Shi0f466e82013-02-22 15:44:58 -08009import os
Simran Basid5e5e272012-09-24 15:23:59 -070010import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080011import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070014import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070015
mussa584b4462014-06-20 15:13:28 -070016import common
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.bin import utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070018from autotest_lib.client.common_lib import autotemp
Richard Barnette0c73ffc2012-11-19 15:21:18 -080019from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080022from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080023from autotest_lib.client.common_lib.cros import retry
Gabe Blackb72f4fb2015-01-20 16:47:13 -080024from autotest_lib.client.common_lib.cros.graphite import autotest_es
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
MK Ryu35d661e2014-09-25 17:44:10 -070026from autotest_lib.client.cros import constants as client_constants
J. Richard Barnette84890bd2014-02-21 11:05:47 -080027from autotest_lib.client.cros import cros_ui
Cheng-Yi Chiangf4104ff2014-12-23 19:39:01 +080028from autotest_lib.client.cros.audio import cras_utils
MK Ryu35d661e2014-09-25 17:44:10 -070029from autotest_lib.server import autoserv_parser
30from autotest_lib.server import autotest
31from autotest_lib.server import constants
32from autotest_lib.server import crashcollect
Dan Shia1ecd5c2013-06-06 11:21:31 -070033from autotest_lib.server import utils as server_utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070034from autotest_lib.server.cros import provision
Scott Zawalski89c44dd2013-02-26 09:28:02 -050035from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070036from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Dan Shi9cb0eec2014-06-03 09:04:50 -070037from autotest_lib.server.cros.faft.config.config import Config as FAFTConfig
Fang Deng96667ca2013-08-01 17:46:18 -070038from autotest_lib.server.hosts import abstract_ssh
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +080039from autotest_lib.server.hosts import chameleon_host
Fang Deng5d518f42013-08-02 14:04:32 -070040from autotest_lib.server.hosts import servo_host
Simran Basidcff4252012-11-20 16:13:20 -080041from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070042
43
beeps32a63082013-08-22 14:02:29 -070044try:
45 import jsonrpclib
46except ImportError:
47 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070048
Fang Dengd1c2b732013-08-20 12:59:46 -070049
beepsc87ff602013-07-31 21:53:00 -070050class FactoryImageCheckerException(error.AutoservError):
51 """Exception raised when an image is a factory image."""
52 pass
53
54
Aviv Keshet74c89a92013-02-04 15:18:30 -080055def add_label_detector(label_function_list, label_list=None, label=None):
56 """Decorator used to group functions together into the provided list.
57 @param label_function_list: List of label detecting functions to add
58 decorated function to.
59 @param label_list: List of detectable labels to add detectable labels to.
60 (Default: None)
61 @param label: Label string that is detectable by this detection function
62 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080063 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070064 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080065 """
66 @param func: The function to be added as a detector.
67 """
68 label_function_list.append(func)
69 if label and label_list is not None:
70 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070071 return func
72 return add_func
73
74
Fang Deng0ca40e22013-08-27 17:47:44 -070075class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070076 """Chromium OS specific subclass of Host."""
77
78 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050079 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070080
Richard Barnette03a0c132012-11-05 12:40:35 -080081 # Timeout values (in seconds) associated with various Chrome OS
82 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070083 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080084 # In general, a good rule of thumb is that the timeout can be up
85 # to twice the typical measured value on the slowest platform.
86 # The times here have not necessarily been empirically tested to
87 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070088 #
89 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080090 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
91 # time to restart the netwowrk.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080092 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070093 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080094 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080095 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070096 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080097 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080098 # network.
beepsf079cfb2013-09-18 17:49:51 -070099 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800100 # POWERWASH_BOOT_TIMEOUT: Time to allow for a reboot that
101 # includes powerwash.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700102
103 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800104 RESUME_TIMEOUT = 10
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +0800105 SHUTDOWN_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700106 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700107 USB_BOOT_TIMEOUT = 150
J. Richard Barnette7817b052014-08-28 09:47:29 -0700108 INSTALL_TIMEOUT = 480
Dan Shi2c88eed2013-11-12 10:18:38 -0800109 POWERWASH_BOOT_TIMEOUT = 60
Chris Sosab76e0ee2013-05-22 16:55:41 -0700110
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800111 # REBOOT_TIMEOUT: How long to wait for a reboot.
112 #
Chris Sosab76e0ee2013-05-22 16:55:41 -0700113 # We have a long timeout to ensure we don't flakily fail due to other
114 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
Simran Basi1160e2c2013-10-04 16:00:24 -0700115 # TODO(sbasi - crbug.com/276094) Restore to 5 mins once the 'host did not
116 # return from reboot' bug is solved.
117 REBOOT_TIMEOUT = 480
Chris Sosab76e0ee2013-05-22 16:55:41 -0700118
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800119 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
120 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
121 _USB_POWER_TIMEOUT = 5
122 _POWER_CYCLE_TIMEOUT = 10
123
beeps32a63082013-08-22 14:02:29 -0700124 _RPC_PROXY_URL = 'http://localhost:%d'
Christopher Wileydd181852013-10-10 19:56:58 -0700125 _RPC_SHUTDOWN_POLLING_PERIOD_SECONDS = 2
Peter Qiu4410db72014-06-05 10:32:41 -0700126 # Set shutdown timeout to account for the time for restarting the UI.
127 _RPC_SHUTDOWN_TIMEOUT_SECONDS = cros_ui.RESTART_UI_TIMEOUT
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800128
Richard Barnette82c35912012-11-20 10:09:10 -0800129 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
130 'rpm_recovery_boards', type=str).split(',')
131
132 _MAX_POWER_CYCLE_ATTEMPTS = 6
133 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
Fang Dengdeba14f2014-11-14 11:54:09 -0800134 _RPM_HOSTNAME_REGEX = ('chromeos(\d+)(-row(\d+))?-rack(\d+[a-z]*)'
135 '-host(\d+)')
Gwendal Grignoua66f1d12014-12-03 10:07:26 -0800136 _LIGHT_SENSOR_FILES = [ "in_illuminance0_input",
137 "in_illuminance_input",
138 "in_illuminance0_raw",
139 "in_illuminance_raw",
140 "illuminance0_input"]
Richard Barnette82c35912012-11-20 10:09:10 -0800141 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
142 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800143 _DETECTABLE_LABELS = []
144 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
145 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700146
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800147 # Constants used in ping_wait_up() and ping_wait_down().
148 #
149 # _PING_WAIT_COUNT is the approximate number of polling
150 # cycles to use when waiting for a host state change.
151 #
152 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
153 # for arguments to the internal _ping_wait_for_status()
154 # method.
155 _PING_WAIT_COUNT = 40
156 _PING_STATUS_DOWN = False
157 _PING_STATUS_UP = True
158
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800159 # Allowed values for the power_method argument.
160
161 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
162 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
163 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
164 POWER_CONTROL_RPM = 'RPM'
165 POWER_CONTROL_SERVO = 'servoj10'
166 POWER_CONTROL_MANUAL = 'manual'
167
168 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
169 POWER_CONTROL_SERVO,
170 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800171
Simran Basi5e6339a2013-03-21 11:34:32 -0700172 _RPM_OUTLET_CHANGED = 'outlet_changed'
173
Dan Shi9cb0eec2014-06-03 09:04:50 -0700174 # URL pattern to download firmware image.
175 _FW_IMAGE_URL_PATTERN = global_config.global_config.get_config_value(
176 'CROS', 'firmware_url_pattern', type=str)
beeps687243d2013-07-18 15:29:27 -0700177
MK Ryu35d661e2014-09-25 17:44:10 -0700178 # File that has a list of directories to be collected
179 _LOGS_TO_COLLECT_FILE = os.path.join(
180 common.client_dir, 'common_lib', 'logs_to_collect')
181
182 # Prefix of logging message w.r.t. crash collection
183 _CRASHLOGS_PREFIX = 'collect_crashlogs'
184
185 # Time duration waiting for host up/down check
186 _CHECK_HOST_UP_TIMEOUT_SECS = 15
187
188 # A command that interacts with kernel and hardware (e.g., rm, mkdir, etc)
189 # might not be completely done deep through the hardware when the machine
190 # is powered down right after the command returns.
191 # We should wait for a few seconds to make them done. Finger crossed.
192 _SAFE_WAIT_SECS = 10
193
194
J. Richard Barnette964fba02012-10-24 17:34:29 -0700195 @staticmethod
beeps46dadc92013-11-07 14:07:10 -0800196 def check_host(host, timeout=10):
197 """
198 Check if the given host is a chrome-os host.
199
200 @param host: An ssh host representing a device.
201 @param timeout: The timeout for the run command.
202
203 @return: True if the host device is chromeos.
204
beeps46dadc92013-11-07 14:07:10 -0800205 """
206 try:
Christopher Wiley1ea80942014-02-26 16:45:08 -0800207 result = host.run('grep -q CHROMEOS /etc/lsb-release && '
Simran Basie5f7ae42014-06-26 15:44:06 -0700208 '! which adb >/dev/null 2>&1 && '
209 '! grep -q moblab /etc/lsb-release',
Christopher Wileyfc3eac02013-11-21 16:24:57 -0800210 ignore_status=True, timeout=timeout)
beeps46dadc92013-11-07 14:07:10 -0800211 except (error.AutoservRunError, error.AutoservSSHTimeout):
212 return False
213 return result.exit_status == 0
214
215
216 @staticmethod
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800217 def _extract_arguments(args_dict, key_subset):
218 """Extract options from `args_dict` and return a subset result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800219
220 Take the provided dictionary of argument options and return
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800221 a subset that represent standard arguments needed to construct
222 a test-assistant object (chameleon or servo) for a host. The
223 intent is to provide standard argument processing from
224 run_remote_tests for tests that require a test-assistant board
225 to operate.
226
227 @param args_dict Dictionary from which to extract the arguments.
228 @param key_subset Tuple of keys to extract from the args_dict, e.g.
229 ('servo_host', 'servo_port').
230 """
231 result = {}
232 for arg in key_subset:
233 if arg in args_dict:
234 result[arg] = args_dict[arg]
235 return result
236
237
238 @staticmethod
239 def get_chameleon_arguments(args_dict):
240 """Extract chameleon options from `args_dict` and return the result.
241
242 Recommended usage:
243 ~~~~~~~~
244 args_dict = utils.args_to_dict(args)
245 chameleon_args = hosts.CrosHost.get_chameleon_arguments(args_dict)
246 host = hosts.create_host(machine, chameleon_args=chameleon_args)
247 ~~~~~~~~
248
249 @param args_dict Dictionary from which to extract the chameleon
250 arguments.
251 """
252 return CrosHost._extract_arguments(
253 args_dict, ('chameleon_host', 'chameleon_port'))
254
255
256 @staticmethod
257 def get_servo_arguments(args_dict):
258 """Extract servo options from `args_dict` and return the result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800259
260 Recommended usage:
261 ~~~~~~~~
262 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700263 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800264 host = hosts.create_host(machine, servo_args=servo_args)
265 ~~~~~~~~
266
267 @param args_dict Dictionary from which to extract the servo
268 arguments.
269 """
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800270 return CrosHost._extract_arguments(
271 args_dict, ('servo_host', 'servo_port'))
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700272
J. Richard Barnette964fba02012-10-24 17:34:29 -0700273
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800274 def _initialize(self, hostname, chameleon_args=None, servo_args=None,
Fang Denge545abb2014-12-30 18:43:47 -0800275 try_lab_servo=False, ssh_verbosity_flag='', ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700276 *args, **dargs):
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800277 """Initialize superclasses, |self.chameleon|, and |self.servo|.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700278
Fang Denge545abb2014-12-30 18:43:47 -0800279 This method will attempt to create the test-assistant object
280 (chameleon/servo) when it is needed by the test. Check
281 the docstring of chameleon_host.create_chameleon_host and
282 servo_host.create_servo_host for how this is determined.
Fang Deng5d518f42013-08-02 14:04:32 -0700283
Fang Denge545abb2014-12-30 18:43:47 -0800284 @param hostname: Hostname of the dut.
285 @param chameleon_args: A dictionary that contains args for creating
286 a ChameleonHost. See chameleon_host for details.
287 @param servo_args: A dictionary that contains args for creating
288 a ServoHost object. See servo_host for details.
289 @param try_lab_servo: Boolean, False indicates that ServoHost should
290 not be created for a device in Cros test lab.
291 See servo_host for details.
292 @param ssh_verbosity_flag: String, to pass to the ssh command to control
293 verbosity.
294 @param ssh_options: String, other ssh options to pass to the ssh
295 command.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700296 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700297 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700298 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700299 # self.env is a dictionary of environment variable settings
300 # to be exported for commands run on the host.
301 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
302 # errors that might happen.
303 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700304 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700305 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700306 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700307 # TODO(fdeng): We need to simplify the
308 # process of servo and servo_host initialization.
309 # crbug.com/298432
Fang Denge545abb2014-12-30 18:43:47 -0800310 self._servo_host = servo_host.create_servo_host(
311 dut=self.hostname, servo_args=servo_args,
312 try_lab_servo=try_lab_servo)
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800313 # TODO(waihong): Do the simplication on Chameleon too.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800314 self._chameleon_host = chameleon_host.create_chameleon_host(
315 dut=self.hostname, chameleon_args=chameleon_args)
316
Dan Shi4d478522014-02-14 13:46:32 -0800317 if self._servo_host is not None:
318 self.servo = self._servo_host.get_servo()
319 else:
320 self.servo = None
321
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800322 if self._chameleon_host:
Tom Wai-Hong Tameaee3402014-01-22 08:52:10 +0800323 self.chameleon = self._chameleon_host.create_chameleon_board()
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800324 else:
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800325 self.chameleon = None
Fang Deng5d518f42013-08-02 14:04:32 -0700326
327
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500328 def get_repair_image_name(self):
329 """Generate a image_name from variables in the global config.
330
331 @returns a str of $board-version/$BUILD.
332
333 """
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500334 board = self._get_board_from_afe()
335 if board is None:
336 raise error.AutoservError('DUT has no board attribute, '
337 'cannot be repaired.')
Dan Shi6964fa52014-12-18 11:04:27 -0800338 stable_version = self._AFE.run('get_stable_version', board=board)
339 build_pattern = global_config.global_config.get_config_value(
340 'CROS', 'stable_build_pattern')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500341 return build_pattern % (board, stable_version)
342
343
Scott Zawalski62bacae2013-03-05 10:40:32 -0500344 def _host_in_AFE(self):
345 """Check if the host is an object the AFE knows.
346
347 @returns the host object.
348 """
349 return self._AFE.get_hosts(hostname=self.hostname)
350
351
Chris Sosab76e0ee2013-05-22 16:55:41 -0700352 def lookup_job_repo_url(self):
353 """Looks up the job_repo_url for the host.
354
355 @returns job_repo_url from AFE or None if not found.
356
357 @raises KeyError if the host does not have a job_repo_url
358 """
359 if not self._host_in_AFE():
360 return None
361
362 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700363 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
364 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700365
366
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500367 def clear_cros_version_labels_and_job_repo_url(self):
368 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500369 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400370 return
371
Scott Zawalski62bacae2013-03-05 10:40:32 -0500372 host_list = [self.hostname]
373 labels = self._AFE.get_labels(
374 name__startswith=ds_constants.VERSION_PREFIX,
375 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800376
Scott Zawalski62bacae2013-03-05 10:40:32 -0500377 for label in labels:
378 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500379
beepscb6f1e22013-06-28 19:14:10 -0700380 self.update_job_repo_url(None, None)
381
382
383 def update_job_repo_url(self, devserver_url, image_name):
384 """
385 Updates the job_repo_url host attribute and asserts it's value.
386
387 @param devserver_url: The devserver to use in the job_repo_url.
388 @param image_name: The name of the image to use in the job_repo_url.
389
390 @raises AutoservError: If we failed to update the job_repo_url.
391 """
392 repo_url = None
393 if devserver_url and image_name:
394 repo_url = tools.get_package_url(devserver_url, image_name)
395 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500396 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700397 if self.lookup_job_repo_url() != repo_url:
398 raise error.AutoservError('Failed to update job_repo_url with %s, '
399 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500400
401
Dan Shie9309262013-06-19 22:50:21 -0700402 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400403 """Add cros_version labels and host attribute job_repo_url.
404
405 @param image_name: The name of the image e.g.
406 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700407
Scott Zawalskieadbf702013-03-14 09:23:06 -0400408 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500409 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400410 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500411
Scott Zawalskieadbf702013-03-14 09:23:06 -0400412 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700413 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500414
415 labels = self._AFE.get_labels(name=cros_label)
416 if labels:
417 label = labels[0]
418 else:
419 label = self._AFE.create_label(name=cros_label)
420
421 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700422 self.update_job_repo_url(devserver_url, image_name)
423
424
beepsdae65fd2013-07-26 16:24:41 -0700425 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700426 """
427 Make sure job_repo_url of this host is valid.
428
joychen03eaad92013-06-26 09:55:21 -0700429 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700430 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
431 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
432 download and extract it. If the devserver embedded in the url is
433 unresponsive, update the job_repo_url of the host after staging it on
434 another devserver.
435
436 @param job_repo_url: A url pointing to the devserver where the autotest
437 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700438 @param tag: The tag from the server job, in the format
439 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700440
441 @raises DevServerException: If we could not resolve a devserver.
442 @raises AutoservError: If we're unable to save the new job_repo_url as
443 a result of choosing a new devserver because the old one failed to
444 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700445 @raises urllib2.URLError: If the devserver embedded in job_repo_url
446 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700447 """
448 job_repo_url = self.lookup_job_repo_url()
449 if not job_repo_url:
450 logging.warning('No job repo url set on host %s', self.hostname)
451 return
452
453 logging.info('Verifying job repo url %s', job_repo_url)
454 devserver_url, image_name = tools.get_devserver_build_from_package_url(
455 job_repo_url)
456
beeps0c865032013-07-30 11:37:06 -0700457 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700458
459 logging.info('Staging autotest artifacts for %s on devserver %s',
460 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700461
462 start_time = time.time()
Simran Basi25e7a922014-10-31 11:56:10 -0700463 ds.stage_artifacts(image_name, ['autotest_packages'])
beeps687243d2013-07-18 15:29:27 -0700464 stage_time = time.time() - start_time
465
466 # Record how much of the verification time comes from a devserver
467 # restage. If we're doing things right we should not see multiple
468 # devservers for a given board/build/branch path.
469 try:
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800470 board, build_type, branch = server_utils.ParseBuildName(
beeps687243d2013-07-18 15:29:27 -0700471 image_name)[:3]
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800472 except server_utils.ParseBuildNameException:
beeps687243d2013-07-18 15:29:27 -0700473 pass
474 else:
beeps0c865032013-07-30 11:37:06 -0700475 devserver = devserver_url[
Chris Sosa65425082013-10-16 13:26:22 -0700476 devserver_url.find('/') + 2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700477 stats_key = {
478 'board': board,
479 'build_type': build_type,
480 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700481 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700482 }
Gabe Black1e1c41b2015-02-04 23:55:15 -0800483 autotest_stats.Gauge('verify_job_repo_url').send(
beeps687243d2013-07-18 15:29:27 -0700484 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
485 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700486
Scott Zawalskieadbf702013-03-14 09:23:06 -0400487
Dan Shicf4d2032015-03-12 15:04:21 -0700488 def stage_server_side_package(self, image=None):
489 """Stage autotest server-side package on devserver.
490
491 @param image: Full path of an OS image to install or a build name.
492
493 @return: A url to the autotest server-side package.
494 """
495 if image:
496 image_name = tools.get_build_from_image(image)
497 if not image_name:
498 raise error.AutoservError(
499 'Failed to parse build name from %s' % image)
500 ds = dev_server.ImageServer.resolve(image_name)
501 else:
502 job_repo_url = self.lookup_job_repo_url()
503 if job_repo_url:
504 devserver_url, image_name = (
505 tools.get_devserver_build_from_package_url(job_repo_url))
506 ds = dev_server.ImageServer(devserver_url)
507 else:
508 labels = self._AFE.get_labels(
509 name__startswith=ds_constants.VERSION_PREFIX,
510 host__hostname=self.hostname)
511 if not labels:
512 raise error.AutoservError(
513 'Failed to stage server-side package. The host has '
514 'no job_report_url attribute or version label.')
515 image_name = labels[0].name[len(ds_constants.VERSION_PREFIX):]
516 ds = dev_server.ImageServer.resolve(image_name)
517 ds.stage_artifacts(image_name, ['autotest_server_package'])
518 return '%s/static/%s/%s' % (ds.url(), image_name,
519 'autotest_server_package.tar.bz2')
520
521
Dan Shi0f466e82013-02-22 15:44:58 -0800522 def _try_stateful_update(self, update_url, force_update, updater):
523 """Try to use stateful update to initialize DUT.
524
525 When DUT is already running the same version that machine_install
526 tries to install, stateful update is a much faster way to clean up
527 the DUT for testing, compared to a full reimage. It is implemeted
528 by calling autoupdater.run_update, but skipping updating root, as
529 updating the kernel is time consuming and not necessary.
530
531 @param update_url: url of the image.
532 @param force_update: Set to True to update the image even if the DUT
533 is running the same version.
534 @param updater: ChromiumOSUpdater instance used to update the DUT.
535 @returns: True if the DUT was updated with stateful update.
536
537 """
J. Richard Barnette3f731032014-04-07 17:42:59 -0700538 # TODO(jrbarnette): Yes, I hate this re.match() test case.
539 # It's better than the alternative: see crbug.com/360944.
540 image_name = autoupdater.url_to_image_name(update_url)
541 release_pattern = r'^.*-release/R[0-9]+-[0-9]+\.[0-9]+\.0$'
542 if not re.match(release_pattern, image_name):
543 return False
Dan Shi0f466e82013-02-22 15:44:58 -0800544 if not updater.check_version():
545 return False
546 if not force_update:
547 logging.info('Canceling stateful update because the new and '
548 'old versions are the same.')
549 return False
550 # Following folders should be rebuilt after stateful update.
551 # A test file is used to confirm each folder gets rebuilt after
552 # the stateful update.
553 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
554 test_file = '.test_file_to_be_deleted'
555 for folder in folders_to_check:
556 touch_path = os.path.join(folder, test_file)
557 self.run('touch %s' % touch_path)
558
559 if not updater.run_update(force_update=True, update_root=False):
560 return False
561
562 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700563 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800564 check_file_cmd = 'test -f %s; echo $?'
565 for folder in folders_to_check:
566 test_file_path = os.path.join(folder, test_file)
567 result = self.run(check_file_cmd % test_file_path,
568 ignore_status=True)
569 if result.exit_status == 1:
570 return False
571 return True
572
573
J. Richard Barnette7275b612013-06-04 18:13:11 -0700574 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800575 """After the DUT is updated, confirm machine_install succeeded.
576
577 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700578 @param expected_kernel: kernel expected to be active after reboot,
579 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800580
581 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700582 # Touch the lab machine file to leave a marker that
583 # distinguishes this image from other test images.
584 # Afterwards, we must re-run the autoreboot script because
585 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800586 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800587 self.run('start autoreboot')
Chris Sosa65425082013-10-16 13:26:22 -0700588 updater.verify_boot_expectations(
589 expected_kernel, rollback_message=
590 'Build %s failed to boot on %s; system rolled back to previous'
591 'build' % (updater.update_version, self.hostname))
J. Richard Barnette7275b612013-06-04 18:13:11 -0700592 # Check that we've got the build we meant to install.
593 if not updater.check_version_to_confirm_install():
594 raise autoupdater.ChromiumOSError(
595 'Failed to update %s to build %s; found build '
596 '%s instead' % (self.hostname,
Chris Sosa65425082013-10-16 13:26:22 -0700597 updater.update_version,
598 updater.get_build_id()))
Dan Shi0f466e82013-02-22 15:44:58 -0800599
600
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700601 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400602 """Stage a build on a devserver and return the update_url.
603
604 @param image_name: a name like lumpy-release/R27-3837.0.0
605 @returns an update URL like:
606 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
607 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700608 if not image_name:
609 image_name = self.get_repair_image_name()
610 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400611 devserver = dev_server.ImageServer.resolve(image_name)
612 devserver.trigger_download(image_name, synchronous=False)
613 return tools.image_url_pattern() % (devserver.url(), image_name)
614
615
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700616 def stage_image_for_servo(self, image_name=None):
617 """Stage a build on a devserver and return the update_url.
618
619 @param image_name: a name like lumpy-release/R27-3837.0.0
620 @returns an update URL like:
621 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
622 """
623 if not image_name:
624 image_name = self.get_repair_image_name()
625 logging.info('Staging build for servo install: %s', image_name)
626 devserver = dev_server.ImageServer.resolve(image_name)
627 devserver.stage_artifacts(image_name, ['test_image'])
628 return devserver.get_test_image_url(image_name)
629
630
beepse539be02013-07-31 21:57:39 -0700631 def stage_factory_image_for_servo(self, image_name):
632 """Stage a build on a devserver and return the update_url.
633
634 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700635
beepse539be02013-07-31 21:57:39 -0700636 @return: An update URL, eg:
637 http://<devserver>/static/canary-channel/\
638 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700639
640 @raises: ValueError if the factory artifact name is missing from
641 the config.
642
beepse539be02013-07-31 21:57:39 -0700643 """
644 if not image_name:
645 logging.error('Need an image_name to stage a factory image.')
646 return
647
beeps12c0a3c2013-09-03 11:58:27 -0700648 factory_artifact = global_config.global_config.get_config_value(
649 'CROS', 'factory_artifact', type=str, default='')
650 if not factory_artifact:
651 raise ValueError('Cannot retrieve the factory artifact name from '
652 'autotest config, and hence cannot stage factory '
653 'artifacts.')
654
beepse539be02013-07-31 21:57:39 -0700655 logging.info('Staging build for servo install: %s', image_name)
656 devserver = dev_server.ImageServer.resolve(image_name)
657 devserver.stage_artifacts(
658 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700659 [factory_artifact],
660 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700661
662 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
663
664
Chris Sosaa3ac2152012-05-23 22:23:13 -0700665 def machine_install(self, update_url=None, force_update=False,
Fang Deng3d3b9272014-12-22 12:20:28 -0800666 local_devserver=False, repair=False,
667 force_full_update=False):
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500668 """Install the DUT.
669
Dan Shi0f466e82013-02-22 15:44:58 -0800670 Use stateful update if the DUT is already running the same build.
671 Stateful update does not update kernel and tends to run much faster
672 than a full reimage. If the DUT is running a different build, or it
673 failed to do a stateful update, full update, including kernel update,
674 will be applied to the DUT.
675
Scott Zawalskieadbf702013-03-14 09:23:06 -0400676 Once a host enters machine_install its cros_version label will be
677 removed as well as its host attribute job_repo_url (used for
678 package install).
679
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500680 @param update_url: The url to use for the update
681 pattern: http://$devserver:###/update/$build
682 If update_url is None and repair is True we will install the
Dan Shi6964fa52014-12-18 11:04:27 -0800683 stable image listed in afe_stable_versions table. If the table
684 is not setup, global_config value under CROS.stable_cros_version
685 will be used instead.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500686 @param force_update: Force an update even if the version installed
687 is the same. Default:False
688 @param local_devserver: Used by run_remote_test to allow people to
689 use their local devserver. Default: False
690 @param repair: Whether or not we are in repair mode. This adds special
691 cases for repairing a machine like starting update_engine.
692 Setting repair to True sets force_update to True as well.
693 default: False
Fang Deng3d3b9272014-12-22 12:20:28 -0800694 @param force_full_update: If True, do not attempt to run stateful
695 update, force a full reimage. If False, try stateful update
696 first when the dut is already installed with the same version.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500697 @raises autoupdater.ChromiumOSError
698
699 """
Dan Shi7458bf62013-06-10 12:50:16 -0700700 if update_url:
701 logging.debug('update url is set to %s', update_url)
702 else:
703 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700704 if self._parser.options.image:
705 requested_build = self._parser.options.image
706 if requested_build.startswith('http://'):
707 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700708 logging.debug('update url is retrieved from requested_build'
709 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700710 else:
711 # Try to stage any build that does not start with
712 # http:// on the devservers defined in
713 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700714 update_url = self._stage_image_for_update(requested_build)
715 logging.debug('Build staged, and update_url is set to: %s',
716 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700717 elif repair:
718 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700719 logging.debug('Build staged, and update_url is set to: %s',
720 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400721 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700722 raise autoupdater.ChromiumOSError(
723 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500724
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500725 if repair:
J. Richard Barnette158f1792015-02-24 17:43:53 -0800726 # In case the system is in a bad state, we always reboot
727 # the machine before trying to repair.
728 #
729 # If Chrome is crashing, the ui-respawn job may reboot
730 # the DUT to try and "fix" it. Guard against that
731 # behavior by stopping the 'ui' job.
732 #
733 # If Chrome failed to start, update-engine won't be running,
734 # so restart it by force.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700735 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette158f1792015-02-24 17:43:53 -0800736 self.run('stop ui || true')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500737 self.run('stop update-engine; start update-engine')
738 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800739
Chris Sosaa3ac2152012-05-23 22:23:13 -0700740 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700741 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800742 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400743 # Remove cros-version and job_repo_url host attribute from host.
744 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800745 # If the DUT is already running the same build, try stateful update
746 # first. Stateful update does not update kernel and tends to run much
747 # faster than a full reimage.
Fang Deng3d3b9272014-12-22 12:20:28 -0800748 if not force_full_update:
749 try:
750 updated = self._try_stateful_update(
751 update_url, force_update, updater)
752 if updated:
753 logging.info('DUT is updated with stateful update.')
754 except Exception as e:
755 logging.exception(e)
756 logging.warning('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700757
Dan Shi0f466e82013-02-22 15:44:58 -0800758 inactive_kernel = None
759 # Do a full update if stateful update is not applicable or failed.
760 if not updated:
Chris Sosab7612bc2013-03-21 10:32:37 -0700761 # TODO(sosa): Remove temporary hack to get rid of bricked machines
762 # that can't update due to a corrupted policy.
763 self.run('rm -rf /var/lib/whitelist')
Danny Chanad490bd2014-11-04 14:15:57 -0800764 self.run('mkdir /var/lib/whitelist')
Chris Sosab7612bc2013-03-21 10:32:37 -0700765 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400766 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700767
Dan Shi0f466e82013-02-22 15:44:58 -0800768 if updater.run_update(force_update):
769 updated = True
770 # Figure out active and inactive kernel.
771 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700772
Dan Shi0f466e82013-02-22 15:44:58 -0800773 # Ensure inactive kernel has higher priority than active.
774 if (updater.get_kernel_priority(inactive_kernel)
775 < updater.get_kernel_priority(active_kernel)):
776 raise autoupdater.ChromiumOSError(
777 'Update failed. The priority of the inactive kernel'
778 ' partition is less than that of the active kernel'
779 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700780
Dan Shi0f466e82013-02-22 15:44:58 -0800781 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700782 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi5699ac22014-12-19 10:55:49 -0800783
784 if updated:
Dan Shi0f466e82013-02-22 15:44:58 -0800785 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400786 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700787 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800788
Simran Basiae08c8c2014-09-02 11:17:26 -0700789 logging.debug('Cleaning up old autotest directories.')
790 try:
791 installed_autodir = autotest.Autotest.get_installed_autodir(self)
792 self.run('rm -rf ' + installed_autodir)
793 except autotest.AutodirNotFoundError:
794 logging.debug('No autotest installed directory found.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700795
796
Dan Shi9cb0eec2014-06-03 09:04:50 -0700797 def _clear_fw_version_labels(self):
798 """Clear firmware version labels from the machine."""
799 labels = self._AFE.get_labels(
800 name__startswith=provision.FW_VERSION_PREFIX,
801 host__hostname=self.hostname)
802 for label in labels:
803 label.remove_hosts(hosts=[self.hostname])
804
805
806 def _add_fw_version_label(self, build):
807 """Add firmware version label to the machine.
808
809 @param build: Build of firmware.
810
811 """
812 fw_label = provision.fw_version_to_label(build)
813 provision.ensure_label_exists(fw_label)
814 label = self._AFE.get_labels(name__startswith=fw_label)[0]
815 label.add_hosts([self.hostname])
816
817
818 def firmware_install(self, build=None):
819 """Install firmware to the DUT.
820
821 Use stateful update if the DUT is already running the same build.
822 Stateful update does not update kernel and tends to run much faster
823 than a full reimage. If the DUT is running a different build, or it
824 failed to do a stateful update, full update, including kernel update,
825 will be applied to the DUT.
826
827 Once a host enters firmware_install its fw_version label will be
828 removed. After the firmware is updated successfully, a new fw_version
829 label will be added to the host.
830
831 @param build: The build version to which we want to provision the
832 firmware of the machine,
833 e.g. 'link-firmware/R22-2695.1.144'.
834
835 TODO(dshi): After bug 381718 is fixed, update here with corresponding
836 exceptions that could be raised.
837
838 """
839 if not self.servo:
840 raise error.TestError('Host %s does not have servo.' %
841 self.hostname)
842
843 # TODO(fdeng): use host.get_board() after
844 # crbug.com/271834 is fixed.
845 board = self._get_board_from_afe()
846
847 # If build is not set, assume it's repair mode and try to install
848 # firmware from stable CrOS.
849 if not build:
850 build = self.get_repair_image_name()
851
852 config = FAFTConfig(board)
853 if config.use_u_boot:
854 ap_image = 'image-%s.bin' % board
855 else: # Depthcharge platform
856 ap_image = 'image.bin'
857 ec_image = 'ec.bin'
858 ds = dev_server.ImageServer.resolve(build)
859 ds.stage_artifacts(build, ['firmware'])
860
861 tmpd = autotemp.tempdir(unique_id='fwimage')
862 try:
863 fwurl = self._FW_IMAGE_URL_PATTERN % (ds.url(), build)
864 local_tarball = os.path.join(tmpd.name, os.path.basename(fwurl))
865 server_utils.system('wget -O %s %s' % (local_tarball, fwurl),
866 timeout=60)
867 server_utils.system('tar xf %s -C %s %s %s' %
868 (local_tarball, tmpd.name, ap_image, ec_image),
869 timeout=60)
870 server_utils.system('tar xf %s --wildcards -C %s "dts/*"' %
871 (local_tarball, tmpd.name),
872 timeout=60, ignore_status=True)
873
874 self._clear_fw_version_labels()
875 logging.info('Will re-program EC now')
876 self.servo.program_ec(os.path.join(tmpd.name, ec_image))
877 logging.info('Will re-program BIOS now')
878 self.servo.program_bios(os.path.join(tmpd.name, ap_image))
879 self.servo.get_power_state_controller().reset()
880 time.sleep(self.servo.BOOT_DELAY)
881 self._add_fw_version_label()
882 finally:
883 tmpd.clean()
884
885
Dan Shi10e992b2013-08-30 11:02:59 -0700886 def show_update_engine_log(self):
887 """Output update engine log."""
MK Ryu35d661e2014-09-25 17:44:10 -0700888 logging.debug('Dumping %s', client_constants.UPDATE_ENGINE_LOG)
889 self.run('cat %s' % client_constants.UPDATE_ENGINE_LOG)
Dan Shi10e992b2013-08-30 11:02:59 -0700890
891
Richard Barnette82c35912012-11-20 10:09:10 -0800892 def _get_board_from_afe(self):
893 """Retrieve this host's board from its labels in the AFE.
894
895 Looks for a host label of the form "board:<board>", and
896 returns the "<board>" part of the label. `None` is returned
897 if there is not a single, unique label matching the pattern.
898
899 @returns board from label, or `None`.
900 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700901 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800902
903
904 def get_build(self):
905 """Retrieve the current build for this Host from the AFE.
906
907 Looks through this host's labels in the AFE to determine its build.
908
909 @returns The current build or None if it could not find it or if there
910 were multiple build labels assigned to this host.
911 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700912 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800913
914
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500915 def _install_repair(self):
916 """Attempt to repair this host using upate-engine.
917
918 If the host is up, try installing the DUT with a stable
Dan Shi6964fa52014-12-18 11:04:27 -0800919 "repair" version of Chrome OS as defined in afe_stable_versions table.
920 If the table is not setup, global_config value under
921 CROS.stable_cros_version will be used instead.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500922
Scott Zawalski62bacae2013-03-05 10:40:32 -0500923 @raises AutoservRepairMethodNA if the DUT is not reachable.
924 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500925
926 """
927 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500928 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500929 logging.info('Attempting to reimage machine to repair image.')
930 try:
931 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700932 except autoupdater.ChromiumOSError as e:
933 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500934 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500935 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500936
937
Dan Shi2c88eed2013-11-12 10:18:38 -0800938 def _install_repair_with_powerwash(self):
Dan Shi9cc48452013-11-12 12:39:26 -0800939 """Attempt to powerwash first then repair this host using update-engine.
Dan Shi2c88eed2013-11-12 10:18:38 -0800940
Dan Shi9cc48452013-11-12 12:39:26 -0800941 update-engine may fail due to a bad image. In such case, powerwash
942 may help to cleanup the DUT for update-engine to work again.
Dan Shi2c88eed2013-11-12 10:18:38 -0800943
944 @raises AutoservRepairMethodNA if the DUT is not reachable.
945 @raises ChromiumOSError if the install failed for some reason.
946
947 """
948 if not self.is_up():
949 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
950
951 logging.info('Attempting to powerwash the DUT.')
952 self.run('echo "fast safe" > '
953 '/mnt/stateful_partition/factory_install_reset')
954 self.reboot(timeout=self.POWERWASH_BOOT_TIMEOUT, wait=True)
955 if not self.is_up():
Dan Shi9cc48452013-11-12 12:39:26 -0800956 logging.error('Powerwash failed. DUT did not come back after '
Dan Shi2c88eed2013-11-12 10:18:38 -0800957 'reboot.')
958 raise error.AutoservRepairFailure(
959 'DUT failed to boot from powerwash after %d seconds' %
960 self.POWERWASH_BOOT_TIMEOUT)
961
962 logging.info('Powerwash succeeded.')
963 self._install_repair()
964
965
beepsf079cfb2013-09-18 17:49:51 -0700966 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
967 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500968 """
969 Re-install the OS on the DUT by:
970 1) installing a test image on a USB storage device attached to the Servo
971 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800972 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700973 3) installing the image with chromeos-install.
974
Scott Zawalski62bacae2013-03-05 10:40:32 -0500975 @param image_url: If specified use as the url to install on the DUT.
976 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700977 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
978 Factory images need a longer usb_boot_timeout than regular
979 cros images.
980 @param install_timeout: The timeout to use when installing the chromeos
981 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800982
Scott Zawalski62bacae2013-03-05 10:40:32 -0500983 @raises AutoservError if the image fails to boot.
beepsf079cfb2013-09-18 17:49:51 -0700984
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800985 """
beepsf079cfb2013-09-18 17:49:51 -0700986 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
987 % usb_boot_timeout)
988 logging.info('Downloading image to USB, then booting from it. Usb boot '
989 'timeout = %s', usb_boot_timeout)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800990 timer = autotest_stats.Timer(usb_boot_timer_key)
beepsf079cfb2013-09-18 17:49:51 -0700991 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700992 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700993 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500994 raise error.AutoservRepairFailure(
995 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700996 usb_boot_timeout)
997 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500998
beepsf079cfb2013-09-18 17:49:51 -0700999 install_timer_key = ('servo_install.install_timeout_%s'
1000 % install_timeout)
Gabe Black1e1c41b2015-02-04 23:55:15 -08001001 timer = autotest_stats.Timer(install_timer_key)
beepsf079cfb2013-09-18 17:49:51 -07001002 timer.start()
1003 logging.info('Installing image through chromeos-install.')
J. Richard Barnette2522a8f2015-03-04 15:59:15 -08001004 self.run('chromeos-install --yes',
MK Ryu35d661e2014-09-25 17:44:10 -07001005 timeout=install_timeout)
J. Richard Barnette0199cc82014-12-05 17:08:40 -08001006 self.run('halt')
beepsf079cfb2013-09-18 17:49:51 -07001007 timer.stop()
1008
1009 logging.info('Power cycling DUT through servo.')
J. Richard Barnette0199cc82014-12-05 17:08:40 -08001010 self.servo.get_power_state_controller().power_off()
Fang Dengafb88142013-05-30 17:44:31 -07001011 self.servo.switch_usbkey('off')
J. Richard Barnette0199cc82014-12-05 17:08:40 -08001012 # N.B. The Servo API requires that we use power_on() here
1013 # for two reasons:
1014 # 1) After turning on a DUT in recovery mode, you must turn
1015 # it off and then on with power_on() once more to
1016 # disable recovery mode (this is a Parrot specific
1017 # requirement).
1018 # 2) After power_off(), the only way to turn on is with
1019 # power_on() (this is a Storm specific requirement).
J. Richard Barnettefbcc7122013-07-24 18:24:59 -07001020 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -07001021
1022 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -08001023 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
1024 raise error.AutoservError('DUT failed to reboot installed '
1025 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -05001026 self.BOOT_TIMEOUT)
1027
1028
J. Richard Barnettee4af8b92013-05-01 13:16:12 -07001029 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -05001030 """Reinstall the DUT utilizing servo and a test image.
1031
1032 Re-install the OS on the DUT by:
1033 1) installing a test image on a USB storage device attached to the Servo
1034 board,
1035 2) booting that image in recovery mode, and then
1036 3) installing the image with chromeos-install.
1037
Scott Zawalski62bacae2013-03-05 10:40:32 -05001038 @raises AutoservRepairMethodNA if the device does not have servo
1039 support.
1040
1041 """
1042 if not self.servo:
1043 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
1044 'DUT has no servo support.')
1045
1046 logging.info('Attempting to recovery servo enabled device with '
1047 'servo_repair_reinstall')
1048
J. Richard Barnettee4af8b92013-05-01 13:16:12 -07001049 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001050 self.servo_install(image_url)
1051
1052
1053 def _servo_repair_power(self):
1054 """Attempt to repair DUT using an attached Servo.
1055
1056 Attempt to power on the DUT via power_long_press.
1057
1058 @raises AutoservRepairMethodNA if the device does not have servo
1059 support.
1060 @raises AutoservRepairFailure if the repair fails for any reason.
1061 """
1062 if not self.servo:
1063 raise error.AutoservRepairMethodNA('Repair Power NA: '
1064 'DUT has no servo support.')
1065
1066 logging.info('Attempting to recover servo enabled device by '
1067 'powering it off and on.')
1068 self.servo.get_power_state_controller().power_off()
1069 self.servo.get_power_state_controller().power_on()
1070 if self.wait_up(self.BOOT_TIMEOUT):
1071 return
1072
1073 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -08001074
1075
Richard Barnette82c35912012-11-20 10:09:10 -08001076 def _powercycle_to_repair(self):
1077 """Utilize the RPM Infrastructure to bring the host back up.
1078
1079 If the host is not up/repaired after the first powercycle we utilize
1080 auto fallback to the last good install by powercycling and rebooting the
1081 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001082
1083 @raises AutoservRepairMethodNA if the device does not support remote
1084 power.
1085 @raises AutoservRepairFailure if the repair fails for any reason.
1086
Richard Barnette82c35912012-11-20 10:09:10 -08001087 """
Scott Zawalski62bacae2013-03-05 10:40:32 -05001088 if not self.has_power():
1089 raise error.AutoservRepairMethodNA('Device does not support power.')
1090
Richard Barnette82c35912012-11-20 10:09:10 -08001091 logging.info('Attempting repair via RPM powercycle.')
1092 failed_cycles = 0
1093 self.power_cycle()
1094 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
1095 failed_cycles += 1
1096 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -05001097 raise error.AutoservRepairFailure(
1098 'Powercycled host %s %d times; device did not come back'
1099 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -08001100 self.power_cycle()
1101 if failed_cycles == 0:
1102 logging.info('Powercycling was successful first time.')
1103 else:
1104 logging.info('Powercycling was successful after %d failures.',
1105 failed_cycles)
1106
1107
MK Ryu35d661e2014-09-25 17:44:10 -07001108 def _reboot_repair(self):
1109 """SSH to this host and reboot."""
1110 if not self.is_up(self._CHECK_HOST_UP_TIMEOUT_SECS):
1111 raise error.AutoservRepairMethodNA('DUT unreachable for reboot.')
1112 logging.info('Attempting repair via SSH reboot.')
1113 self.reboot(timeout=self.BOOT_TIMEOUT, wait=True)
1114
1115
Prashanth B4d8184f2014-05-05 12:22:02 -07001116 def check_device(self):
1117 """Check if a device is ssh-able, and if so, clean and verify it.
1118
1119 @raise AutoservSSHTimeout: If the ssh ping times out.
1120 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
1121 permissions.
1122 @raise AutoservSshPingHostError: For other AutoservRunErrors during
1123 ssh_ping.
1124 @raises AutoservError: As appropriate, during cleanup and verify.
1125 """
1126 self.ssh_ping()
1127 self.cleanup()
1128 self.verify()
1129
1130
Richard Barnette82c35912012-11-20 10:09:10 -08001131 def repair_full(self):
1132 """Repair a host for repair level NO_PROTECTION.
1133
1134 This overrides the base class function for repair; it does
1135 not call back to the parent class, but instead offers a
1136 simplified implementation based on the capabilities in the
1137 Chrome OS test lab.
1138
Fang Deng5d518f42013-08-02 14:04:32 -07001139 It first verifies and repairs servo if it is a DUT in CrOS
Fang Deng03590af2013-10-07 17:34:20 -07001140 lab and a servo is attached.
Fang Deng5d518f42013-08-02 14:04:32 -07001141
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001142 This escalates in order through the following procedures and verifies
1143 the status using `self.check_device()` after each of them. This is done
1144 until both the repair and the veryfing step succeed.
1145
MK Ryu35d661e2014-09-25 17:44:10 -07001146 Escalation order of repair procedures from less intrusive to
1147 more intrusive repairs:
1148 1. SSH to the DUT and reboot.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001149 2. If there's a servo for the DUT, try to power the DUT off and
1150 on.
MK Ryu35d661e2014-09-25 17:44:10 -07001151 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -08001152 by power-cycling.
MK Ryu35d661e2014-09-25 17:44:10 -07001153 4. Try to re-install to a known stable image using
1154 auto-update.
1155 5. If there's a servo for the DUT, try to re-install via
1156 the servo.
Richard Barnette82c35912012-11-20 10:09:10 -08001157
1158 As with the parent method, the last operation performed on
Prashanth B4d8184f2014-05-05 12:22:02 -07001159 the DUT must be to call `self.check_device()`; If that call fails the
1160 exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -07001161
Scott Zawalski62bacae2013-03-05 10:40:32 -05001162 @raises AutoservRepairTotalFailure if the repair process fails to
1163 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -07001164 @raises ServoHostRepairTotalFailure if the repair process fails to
1165 fix the servo host if one is attached to the DUT.
1166 @raises AutoservSshPermissionDeniedError if it is unable
1167 to ssh to the servo host due to permission error.
1168
Richard Barnette82c35912012-11-20 10:09:10 -08001169 """
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001170 # Caution: Deleting shards relies on repair to always reboot the DUT.
1171
Dan Shi4d478522014-02-14 13:46:32 -08001172 if self._servo_host and not self.servo:
Fang Deng03590af2013-10-07 17:34:20 -07001173 try:
Dan Shi4d478522014-02-14 13:46:32 -08001174 self._servo_host.repair_full()
Fang Deng03590af2013-10-07 17:34:20 -07001175 except Exception as e:
Fang Deng03590af2013-10-07 17:34:20 -07001176 logging.error('Could not create a healthy servo: %s', e)
Dan Shi4d478522014-02-14 13:46:32 -08001177 self.servo = self._servo_host.get_servo()
Fang Deng5d518f42013-08-02 14:04:32 -07001178
MK Ryu35d661e2014-09-25 17:44:10 -07001179 self.try_collect_crashlogs()
1180
Scott Zawalski62bacae2013-03-05 10:40:32 -05001181 # TODO(scottz): This should use something similar to label_decorator,
1182 # but needs to be populated in order so DUTs are repaired with the
1183 # least amount of effort.
MK Ryu35d661e2014-09-25 17:44:10 -07001184 repair_funcs = [self._reboot_repair,
1185 self._servo_repair_power,
1186 self._powercycle_to_repair,
Dan Shi849a1c42014-03-05 11:10:43 -08001187 self._install_repair,
Dan Shi2c88eed2013-11-12 10:18:38 -08001188 self._install_repair_with_powerwash,
MK Ryu35d661e2014-09-25 17:44:10 -07001189 self._servo_repair_reinstall]
Scott Zawalski62bacae2013-03-05 10:40:32 -05001190 errors = []
Simran Basie6130932013-10-01 14:07:52 -07001191 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001192 for repair_func in repair_funcs:
1193 try:
1194 repair_func()
MK Ryu35d661e2014-09-25 17:44:10 -07001195 self.try_collect_crashlogs()
Prashanth B4d8184f2014-05-05 12:22:02 -07001196 self.check_device()
Gabe Black1e1c41b2015-02-04 23:55:15 -08001197 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001198 '%s.SUCCEEDED' % repair_func.__name__).increment()
1199 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001200 autotest_stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001201 '%s.%s.SUCCEEDED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001202 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001203 return
Simran Basie6130932013-10-01 14:07:52 -07001204 except error.AutoservRepairMethodNA as e:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001205 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001206 '%s.RepairNA' % repair_func.__name__).increment()
1207 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001208 autotest_stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001209 '%s.%s.RepairNA' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001210 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001211 logging.warning('Repair function NA: %s', e)
Simran Basie6130932013-10-01 14:07:52 -07001212 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -05001213 except Exception as e:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001214 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001215 '%s.FAILED' % repair_func.__name__).increment()
1216 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001217 autotest_stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001218 '%s.%s.FAILED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001219 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001220 logging.warning('Failed to repair device: %s', e)
Scott Zawalski62bacae2013-03-05 10:40:32 -05001221 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -05001222
Gabe Black1e1c41b2015-02-04 23:55:15 -08001223 autotest_stats.Counter('Full_Repair_Failed').increment()
Simran Basie6130932013-10-01 14:07:52 -07001224 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001225 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001226 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001227 raise error.AutoservRepairTotalFailure(
1228 'All attempts at repairing the device failed:\n%s' %
1229 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -08001230
1231
MK Ryu35d661e2014-09-25 17:44:10 -07001232 def try_collect_crashlogs(self, check_host_up=True):
1233 """
1234 Check if a host is up and logs need to be collected from the host,
1235 if yes, collect them.
1236
1237 @param check_host_up: Flag for checking host is up. Default is True.
1238 """
1239 try:
1240 crash_job = self._need_crash_logs()
1241 if crash_job:
1242 logging.debug('%s: Job %s was crashed', self._CRASHLOGS_PREFIX,
1243 crash_job)
1244 if not check_host_up or self.is_up(
1245 self._CHECK_HOST_UP_TIMEOUT_SECS):
1246 self._collect_crashlogs(crash_job)
1247 logging.debug('%s: Completed collecting logs for the '
1248 'crashed job %s', self._CRASHLOGS_PREFIX,
1249 crash_job)
1250 except Exception as e:
1251 # Exception should not result in repair failure.
1252 # Therefore, suppress all exceptions here.
1253 logging.error('%s: Failed while trying to collect crash-logs: %s',
1254 self._CRASHLOGS_PREFIX, e)
1255
1256
1257 def _need_crash_logs(self):
1258 """Get the value of need_crash_logs attribute of this host.
1259
1260 @return: Value string of need_crash_logs attribute
1261 None if there is no need_crash_logs attribute
1262 """
1263 attrs = self._AFE.get_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1264 hostname=self.hostname)
1265 assert len(attrs) < 2
1266 return attrs[0].value if attrs else None
1267
1268
1269 def _collect_crashlogs(self, job_id):
1270 """Grab logs from the host where a job was crashed.
1271
1272 First, check if PRIOR_LOGS_DIR exists in the host.
1273 If yes, collect them.
1274 Otherwise, check if a lab-machine marker (_LAB_MACHINE_FILE) exists
1275 in the host.
1276 If yes, the host was repaired automatically, and we collect normal
1277 system logs.
1278
1279 @param job_id: Id of the job that was crashed.
1280 """
1281 crashlogs_dir = crashcollect.get_crashinfo_dir(self,
1282 constants.CRASHLOGS_DEST_DIR_PREFIX)
1283 flag_prior_logs = False
1284
1285 if self.path_exists(client_constants.PRIOR_LOGS_DIR):
1286 flag_prior_logs = True
1287 self._collect_prior_logs(crashlogs_dir)
1288 elif self.path_exists(self._LAB_MACHINE_FILE):
1289 self._collect_system_logs(crashlogs_dir)
1290 else:
1291 logging.warning('%s: Host was manually re-installed without '
1292 '--lab_preserve_log option. Skip collecting '
1293 'crash-logs.', self._CRASHLOGS_PREFIX)
1294
1295 # We make crash collection be one-time effort.
1296 # _collect_prior_logs() and _collect_system_logs() will not throw
1297 # any exception, and following codes will be executed even when
1298 # those methods fail.
1299 # _collect_crashlogs() is called only when the host is up (refer
1300 # to try_collect_crashlogs()). We assume _collect_prior_logs() and
1301 # _collect_system_logs() fail rarely when the host is up.
1302 # In addition, it is not clear how many times we should try crash
1303 # collection again while not triggering next repair unnecessarily.
1304 # Threfore, we try crash collection one time.
1305
1306 # Create a marker file as soon as log collection is done.
1307 # Leave the job id to this marker for gs_offloader to consume.
1308 marker_file = os.path.join(crashlogs_dir, constants.CRASHLOGS_MARKER)
1309 with open(marker_file, 'a') as f:
1310 f.write('%s\n' % job_id)
1311
1312 # Remove need_crash_logs attribute
1313 logging.debug('%s: Remove attribute need_crash_logs from host %s',
1314 self._CRASHLOGS_PREFIX, self.hostname)
1315 self._AFE.set_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1316 None, hostname=self.hostname)
1317
1318 if flag_prior_logs:
1319 logging.debug('%s: Remove %s from host %s', self._CRASHLOGS_PREFIX,
1320 client_constants.PRIOR_LOGS_DIR, self.hostname)
1321 self.run('rm -rf %s; sync' % client_constants.PRIOR_LOGS_DIR)
1322 # Wait for a few seconds to make sure the prior command is
1323 # done deep through storage.
1324 time.sleep(self._SAFE_WAIT_SECS)
1325
1326
1327 def _collect_prior_logs(self, crashlogs_dir):
1328 """Grab prior logs that were stashed before re-installing a host.
1329
1330 @param crashlogs_dir: Directory path where crash-logs are stored.
1331 """
1332 logging.debug('%s: Found %s, collecting them...',
1333 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1334 try:
1335 self.collect_logs(client_constants.PRIOR_LOGS_DIR,
1336 crashlogs_dir, False)
1337 logging.debug('%s: %s is collected',
1338 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1339 except Exception as e:
1340 logging.error('%s: Failed to collect %s: %s',
1341 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR,
1342 e)
1343
1344
1345 def _collect_system_logs(self, crashlogs_dir):
1346 """Grab normal system logs from a host.
1347
1348 @param crashlogs_dir: Directory path where crash-logs are stored.
1349 """
1350 logging.debug('%s: Found %s, collecting system logs...',
1351 self._CRASHLOGS_PREFIX, self._LAB_MACHINE_FILE)
1352 sources = server_utils.parse_simple_config(self._LOGS_TO_COLLECT_FILE)
1353 for src in sources:
1354 try:
1355 if self.path_exists(src):
1356 logging.debug('%s: Collecting %s...',
1357 self._CRASHLOGS_PREFIX, src)
1358 dest = server_utils.concat_path_except_last(
1359 crashlogs_dir, src)
1360 self.collect_logs(src, dest, False)
1361 logging.debug('%s: %s is collected',
1362 self._CRASHLOGS_PREFIX, src)
1363 except Exception as e:
1364 logging.error('%s: Failed to collect %s: %s',
1365 self._CRASHLOGS_PREFIX, src, e)
1366
1367
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001368 def close(self):
beeps32a63082013-08-22 14:02:29 -07001369 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -07001370 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001371
1372
Dan Shi49ca0932014-11-14 11:22:27 -08001373 def get_power_supply_info(self):
1374 """Get the output of power_supply_info.
1375
1376 power_supply_info outputs the info of each power supply, e.g.,
1377 Device: Line Power
1378 online: no
1379 type: Mains
1380 voltage (V): 0
1381 current (A): 0
1382 Device: Battery
1383 state: Discharging
1384 percentage: 95.9276
1385 technology: Li-ion
1386
1387 Above output shows two devices, Line Power and Battery, with details of
1388 each device listed. This function parses the output into a dictionary,
1389 with key being the device name, and value being a dictionary of details
1390 of the device info.
1391
1392 @return: The dictionary of power_supply_info, e.g.,
1393 {'Line Power': {'online': 'yes', 'type': 'main'},
1394 'Battery': {'vendor': 'xyz', 'percentage': '100'}}
Dan Shie9b765d2014-12-29 16:59:49 -08001395 @raise error.AutoservRunError if power_supply_info tool is not found in
1396 the DUT. Caller should handle this error to avoid false failure
1397 on verification.
Dan Shi49ca0932014-11-14 11:22:27 -08001398 """
1399 result = self.run('power_supply_info').stdout.strip()
1400 info = {}
1401 device_name = None
1402 device_info = {}
1403 for line in result.split('\n'):
1404 pair = [v.strip() for v in line.split(':')]
1405 if len(pair) != 2:
1406 continue
1407 if pair[0] == 'Device':
1408 if device_name:
1409 info[device_name] = device_info
1410 device_name = pair[1]
1411 device_info = {}
1412 else:
1413 device_info[pair[0]] = pair[1]
1414 if device_name and not device_name in info:
1415 info[device_name] = device_info
1416 return info
1417
1418
1419 def get_battery_percentage(self):
1420 """Get the battery percentage.
1421
1422 @return: The percentage of battery level, value range from 0-100. Return
1423 None if the battery info cannot be retrieved.
1424 """
1425 try:
1426 info = self.get_power_supply_info()
1427 logging.info(info)
1428 return float(info['Battery']['percentage'])
Dan Shie9b765d2014-12-29 16:59:49 -08001429 except (KeyError, ValueError, error.AutoservRunError):
Dan Shi49ca0932014-11-14 11:22:27 -08001430 return None
1431
1432
1433 def is_ac_connected(self):
1434 """Check if the dut has power adapter connected and charging.
1435
1436 @return: True if power adapter is connected and charging.
1437 """
1438 try:
1439 info = self.get_power_supply_info()
1440 return info['Line Power']['online'] == 'yes'
Dan Shie9b765d2014-12-29 16:59:49 -08001441 except (KeyError, error.AutoservRunError):
1442 return None
Dan Shi49ca0932014-11-14 11:22:27 -08001443
1444
Simran Basi5e6339a2013-03-21 11:34:32 -07001445 def _cleanup_poweron(self):
1446 """Special cleanup method to make sure hosts always get power back."""
1447 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1448 hosts = afe.get_hosts(hostname=self.hostname)
1449 if not hosts or not (self._RPM_OUTLET_CHANGED in
1450 hosts[0].attributes):
1451 return
1452 logging.debug('This host has recently interacted with the RPM'
1453 ' Infrastructure. Ensuring power is on.')
1454 try:
1455 self.power_on()
Dan Shi7dca56e2014-11-11 17:07:56 -08001456 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1457 hostname=self.hostname)
Simran Basi5e6339a2013-03-21 11:34:32 -07001458 except rpm_client.RemotePowerException:
Simran Basi5e6339a2013-03-21 11:34:32 -07001459 logging.error('Failed to turn Power On for this host after '
1460 'cleanup through the RPM Infrastructure.')
Gabe Blackb72f4fb2015-01-20 16:47:13 -08001461 autotest_es.post(
Dan Shi7dca56e2014-11-11 17:07:56 -08001462 type_str='RPM_poweron_failure',
1463 metadata={'hostname': self.hostname})
Dan Shi49ca0932014-11-14 11:22:27 -08001464
1465 battery_percentage = self.get_battery_percentage()
Dan Shif01ebe22014-12-05 13:10:57 -08001466 if battery_percentage and battery_percentage < 50:
Dan Shi49ca0932014-11-14 11:22:27 -08001467 raise
1468 elif self.is_ac_connected():
1469 logging.info('The device has power adapter connected and '
1470 'charging. No need to try to turn RPM on '
1471 'again.')
1472 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1473 hostname=self.hostname)
1474 logging.info('Battery level is now at %s%%. The device may '
1475 'still have enough power to run test, so no '
1476 'exception will be raised.', battery_percentage)
1477
Simran Basi5e6339a2013-03-21 11:34:32 -07001478
beepsc87ff602013-07-31 21:53:00 -07001479 def _is_factory_image(self):
1480 """Checks if the image on the DUT is a factory image.
1481
1482 @return: True if the image on the DUT is a factory image.
1483 False otherwise.
1484 """
1485 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1486 return result.exit_status == 0
1487
1488
1489 def _restart_ui(self):
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001490 """Restart the Chrome UI.
beepsc87ff602013-07-31 21:53:00 -07001491
1492 @raises: FactoryImageCheckerException for factory images, since
1493 we cannot attempt to restart ui on them.
1494 error.AutoservRunError for any other type of error that
1495 occurs while restarting ui.
1496 """
1497 if self._is_factory_image():
1498 raise FactoryImageCheckerException('Cannot restart ui on factory '
1499 'images')
1500
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001501 # TODO(jrbarnette): The command to stop/start the ui job
1502 # should live inside cros_ui, too. However that would seem
1503 # to imply interface changes to the existing start()/restart()
1504 # functions, which is a bridge too far (for now).
1505 prompt = cros_ui.get_login_prompt_state(self)
1506 self.run('stop ui; start ui')
1507 cros_ui.wait_for_chrome_ready(prompt, self)
beepsc87ff602013-07-31 21:53:00 -07001508
1509
1510 def cleanup(self):
MK Ryu35d661e2014-09-25 17:44:10 -07001511 self.run('rm -f %s' % client_constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001512 try:
beepsc87ff602013-07-31 21:53:00 -07001513 self._restart_ui()
1514 except (error.AutotestRunError, error.AutoservRunError,
1515 FactoryImageCheckerException):
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001516 logging.warning('Unable to restart ui, rebooting device.')
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001517 # Since restarting the UI fails fall back to normal Autotest
1518 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001519 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001520 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001521 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001522 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001523
1524
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001525 def reboot(self, **dargs):
1526 """
1527 This function reboots the site host. The more generic
1528 RemoteHost.reboot() performs sync and sleeps for 5
1529 seconds. This is not necessary for Chrome OS devices as the
1530 sync should be finished in a short time during the reboot
1531 command.
1532 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001533 if 'reboot_cmd' not in dargs:
Doug Anderson7d5aeb22014-02-27 15:12:17 -08001534 reboot_timeout = dargs.get('reboot_timeout', 10)
1535 dargs['reboot_cmd'] = ('((reboot & sleep %d; reboot -f &)'
1536 ' </dev/null >/dev/null 2>&1 &)' %
1537 reboot_timeout)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001538 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001539 if 'fastsync' not in dargs:
1540 dargs['fastsync'] = True
Michael Liangda8c60a2014-06-03 13:24:51 -07001541
Charlie Mooneya8e6dab2014-05-29 14:37:55 -07001542 # For purposes of logging reboot times:
1543 # Get the board name i.e. 'daisy_spring'
Michael Liangca4f5a62014-07-10 15:45:13 -07001544 board_fullname = self.get_board()
1545
1546 # Strip the prefix and add it to dargs.
1547 dargs['board'] = board_fullname[board_fullname.find(':')+1:]
Fang Deng0ca40e22013-08-27 17:47:44 -07001548 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001549
1550
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07001551 def suspend(self, **dargs):
1552 """
1553 This function suspends the site host.
1554 """
1555 suspend_time = dargs.get('suspend_time', 60)
1556 dargs['timeout'] = suspend_time
1557 if 'suspend_cmd' not in dargs:
1558 cmd = ' && '.join(['echo 0 > /sys/class/rtc/rtc0/wakealarm',
1559 'echo +%d > /sys/class/rtc/rtc0/wakealarm' % suspend_time,
1560 'powerd_dbus_suspend --delay=0 &'])
1561 dargs['suspend_cmd'] = ('(( %s )'
1562 '< /dev/null >/dev/null 2>&1 &)' % cmd)
1563 super(CrosHost, self).suspend(**dargs)
1564
1565
Simran Basiec564392014-08-25 16:48:09 -07001566 def upstart_status(self, service_name):
1567 """Check the status of an upstart init script.
1568
1569 @param service_name: Service to look up.
1570
1571 @returns True if the service is running, False otherwise.
1572 """
1573 return self.run('status %s | grep start/running' %
1574 service_name).stdout.strip() != ''
1575
1576
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001577 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001578 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001579
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001580 Tests for the following conditions:
1581 1. All conditions tested by the parent version of this
1582 function.
1583 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001584 3. Sufficient space in /mnt/stateful_partition/encrypted.
1585 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001586
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001587 """
MK Ryu35d661e2014-09-25 17:44:10 -07001588 # Check if a job was crashed on this host.
1589 # If yes, avoid verification until crash-logs are collected.
1590 if self._need_crash_logs():
1591 raise error.AutoservCrashLogCollectRequired(
1592 'Need to collect crash-logs before verification')
1593
Fang Deng0ca40e22013-08-27 17:47:44 -07001594 super(CrosHost, self).verify_software()
J. Richard Barnette4164d1d2014-12-02 17:52:33 -08001595 self.check_inodes(
1596 '/mnt/stateful_partition',
1597 global_config.global_config.get_config_value(
1598 'SERVER', 'kilo_inodes_required', type=int,
1599 default=100))
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001600 self.check_diskspace(
1601 '/mnt/stateful_partition',
1602 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001603 'SERVER', 'gb_diskspace_required', type=float,
1604 default=20.0))
Gaurav Shahe448af82014-06-19 15:18:59 -07001605 encrypted_stateful_path = '/mnt/stateful_partition/encrypted'
1606 # Not all targets build with encrypted stateful support.
1607 if self.path_exists(encrypted_stateful_path):
1608 self.check_diskspace(
1609 encrypted_stateful_path,
1610 global_config.global_config.get_config_value(
1611 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1612 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001613
Simran Basiec564392014-08-25 16:48:09 -07001614 if not self.upstart_status('system-services'):
Prashanth B5d0a0512014-04-25 12:26:08 -07001615 raise error.AutoservError('Chrome failed to reach login. '
1616 'System services not running.')
1617
beepsc87ff602013-07-31 21:53:00 -07001618 # Factory images don't run update engine,
1619 # goofy controls dbus on these DUTs.
1620 if not self._is_factory_image():
1621 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001622 # Makes sure python is present, loads and can use built in functions.
1623 # We have seen cases where importing cPickle fails with undefined
1624 # symbols in cPickle.so.
1625 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001626
1627
Dan Shi49ca0932014-11-14 11:22:27 -08001628 def verify_hardware(self):
1629 """Verify hardware system of a Chrome OS system.
1630
1631 Check following hardware conditions:
1632 1. Battery level.
1633 2. Is power adapter connected.
1634 """
1635 logging.info('Battery percentage: %s', self.get_battery_percentage())
Dan Shie9b765d2014-12-29 16:59:49 -08001636 if self.is_ac_connected() is None:
1637 logging.info('Can not determine if the device has power adapter '
1638 'connected.')
1639 else:
1640 logging.info('Device %s power adapter connected and charging.',
1641 'has' if self.is_ac_connected() else 'does not have')
Dan Shi49ca0932014-11-14 11:22:27 -08001642
1643
Fang Deng96667ca2013-08-01 17:46:18 -07001644 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1645 connect_timeout=None, alive_interval=None):
1646 """Override default make_ssh_command to use options tuned for Chrome OS.
1647
1648 Tuning changes:
1649 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1650 connection failure. Consistency with remote_access.sh.
1651
1652 - ServerAliveInterval=180; which causes SSH to ping connection every
1653 180 seconds. In conjunction with ServerAliveCountMax ensures
1654 that if the connection dies, Autotest will bail out quickly.
1655 Originally tried 60 secs, but saw frequent job ABORTS where
1656 the test completed successfully.
1657
1658 - ServerAliveCountMax=3; consistency with remote_access.sh.
1659
1660 - ConnectAttempts=4; reduce flakiness in connection errors;
1661 consistency with remote_access.sh.
1662
1663 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1664 Host keys change with every new installation, don't waste
1665 memory/space saving them.
1666
1667 - SSH protocol forced to 2; needed for ServerAliveInterval.
1668
1669 @param user User name to use for the ssh connection.
1670 @param port Port on the target host to use for ssh connection.
1671 @param opts Additional options to the ssh command.
1672 @param hosts_file Ignored.
1673 @param connect_timeout Ignored.
1674 @param alive_interval Ignored.
1675 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001676 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1677 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001678 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1679 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1680 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1681 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001682 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1683 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001684
1685
beeps32a63082013-08-22 14:02:29 -07001686 def _create_ssh_tunnel(self, port, local_port):
1687 """Create an ssh tunnel from local_port to port.
1688
1689 @param port: remote port on the host.
1690 @param local_port: local forwarding port.
1691
1692 @return: the tunnel process.
1693 """
1694 # Chrome OS on the target closes down most external ports
1695 # for security. We could open the port, but doing that
1696 # would conflict with security tests that check that only
1697 # expected ports are open. So, to get to the port on the
1698 # target we use an ssh tunnel.
1699 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1700 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1701 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1702 logging.debug('Full tunnel command: %s', tunnel_cmd)
1703 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1704 logging.debug('Started ssh tunnel, local = %d'
1705 ' remote = %d, pid = %d',
1706 local_port, port, tunnel_proc.pid)
1707 return tunnel_proc
1708
1709
Christopher Wileydd181852013-10-10 19:56:58 -07001710 def _setup_rpc(self, port, command_name, remote_pid=None):
beeps32a63082013-08-22 14:02:29 -07001711 """Sets up a tunnel process and performs rpc connection book keeping.
1712
1713 This method assumes that xmlrpc and jsonrpc never conflict, since
1714 we can only either have an xmlrpc or a jsonrpc server listening on
1715 a remote port. As such, it enforces a single proxy->remote port
1716 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1717 and then tries to start an xmlrpc proxy forwarded to the same port,
1718 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1719
1720 1. None of the methods on the xmlrpc proxy will work because
1721 the server listening on B is jsonrpc.
1722
1723 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1724 server, as the only use case currently is goofy, which is tied to
1725 the factory image. It is much easier to handle a failed xmlrpc
1726 call on the client than it is to terminate goofy in this scenario,
1727 as doing the latter might leave the DUT in a hard to recover state.
1728
1729 With the current implementation newer rpc proxy connections will
1730 terminate the tunnel processes of older rpc connections tunneling
1731 to the same remote port. If methods are invoked on the client
1732 after this has happened they will fail with connection closed errors.
1733
1734 @param port: The remote forwarding port.
1735 @param command_name: The name of the remote process, to terminate
1736 using pkill.
1737
1738 @return A url that we can use to initiate the rpc connection.
1739 """
1740 self.rpc_disconnect(port)
1741 local_port = utils.get_unused_port()
1742 tunnel_proc = self._create_ssh_tunnel(port, local_port)
Christopher Wileydd181852013-10-10 19:56:58 -07001743 self._rpc_proxy_map[port] = (command_name, tunnel_proc, remote_pid)
beeps32a63082013-08-22 14:02:29 -07001744 return self._RPC_PROXY_URL % local_port
1745
1746
Christopher Wileyd78249a2013-03-01 13:05:31 -08001747 def xmlrpc_connect(self, command, port, command_name=None,
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001748 ready_test_name=None, timeout_seconds=10,
1749 logfile='/dev/null'):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001750 """Connect to an XMLRPC server on the host.
1751
1752 The `command` argument should be a simple shell command that
1753 starts an XMLRPC server on the given `port`. The command
1754 must not daemonize, and must terminate cleanly on SIGTERM.
1755 The command is started in the background on the host, and a
1756 local XMLRPC client for the server is created and returned
1757 to the caller.
1758
1759 Note that the process of creating an XMLRPC client makes no
1760 attempt to connect to the remote server; the caller is
1761 responsible for determining whether the server is running
1762 correctly, and is ready to serve requests.
1763
Christopher Wileyd78249a2013-03-01 13:05:31 -08001764 Optionally, the caller can pass ready_test_name, a string
1765 containing the name of a method to call on the proxy. This
1766 method should take no parameters and return successfully only
1767 when the server is ready to process client requests. When
1768 ready_test_name is set, xmlrpc_connect will block until the
1769 proxy is ready, and throw a TestError if the server isn't
1770 ready by timeout_seconds.
1771
beeps32a63082013-08-22 14:02:29 -07001772 If a server is already running on the remote port, this
1773 method will kill it and disconnect the tunnel process
1774 associated with the connection before establishing a new one,
1775 by consulting the rpc_proxy_map in rpc_disconnect.
1776
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001777 @param command Shell command to start the server.
1778 @param port Port number on which the server is expected to
1779 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001780 @param command_name String to use as input to `pkill` to
1781 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001782 @param ready_test_name String containing the name of a
1783 method defined on the XMLRPC server.
1784 @param timeout_seconds Number of seconds to wait
1785 for the server to become 'ready.' Will throw a
1786 TestFail error if server is not ready in time.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001787 @param logfile Logfile to send output when running
1788 'command' argument.
Yusuf Mohsinally8d19e3c2013-11-21 14:25:45 -08001789
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001790 """
Christopher Wileyc14f06a2013-10-16 13:55:39 -07001791 # Clean up any existing state. If the caller is willing
1792 # to believe their server is down, we ought to clean up
1793 # any tunnels we might have sitting around.
1794 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001795 # Start the server on the host. Redirection in the command
1796 # below is necessary, because 'ssh' won't terminate until
1797 # background child processes close stdin, stdout, and
1798 # stderr.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001799 remote_cmd = '%s </dev/null >%s 2>&1 & echo $!' % (command, logfile)
Christopher Wileydd181852013-10-10 19:56:58 -07001800 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001801 logging.debug('Started XMLRPC server on host %s, pid = %s',
1802 self.hostname, remote_pid)
1803
Christopher Wileydd181852013-10-10 19:56:58 -07001804 # Tunnel through SSH to be able to reach that remote port.
1805 rpc_url = self._setup_rpc(port, command_name, remote_pid=remote_pid)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001806 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001807
Christopher Wileyd78249a2013-03-01 13:05:31 -08001808 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001809 # retry.retry logs each attempt; calculate delay_sec to
1810 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001811 @retry.retry((socket.error,
1812 xmlrpclib.ProtocolError,
1813 httplib.BadStatusLine),
Chris Sosa65425082013-10-16 13:26:22 -07001814 timeout_min=timeout_seconds / 60.0,
1815 delay_sec=min(max(timeout_seconds / 20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001816 def ready_test():
1817 """ Call proxy.ready_test_name(). """
1818 getattr(proxy, ready_test_name)()
1819 successful = False
1820 try:
1821 logging.info('Waiting %d seconds for XMLRPC server '
1822 'to start.', timeout_seconds)
1823 ready_test()
1824 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001825 finally:
1826 if not successful:
1827 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001828 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001829 logging.info('XMLRPC server started successfully.')
1830 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001831
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001832
Jason Abeleb6f924f2013-11-13 16:01:54 -08001833 def syslog(self, message, tag='autotest'):
1834 """Logs a message to syslog on host.
1835
1836 @param message String message to log into syslog
1837 @param tag String tag prefix for syslog
1838
1839 """
1840 self.run('logger -t "%s" "%s"' % (tag, message))
1841
1842
beeps32a63082013-08-22 14:02:29 -07001843 def jsonrpc_connect(self, port):
1844 """Creates a jsonrpc proxy connection through an ssh tunnel.
1845
1846 This method exists to facilitate communication with goofy (which is
1847 the default system manager on all factory images) and as such, leaves
1848 most of the rpc server sanity checking to the caller. Unlike
1849 xmlrpc_connect, this method does not facilitate the creation of a remote
1850 jsonrpc server, as the only clients of this code are factory tests,
1851 for which the goofy system manager is built in to the image and starts
1852 when the target boots.
1853
1854 One can theoretically create multiple jsonrpc proxies all forwarded
1855 to the same remote port, provided the remote port has an rpc server
1856 listening. However, in doing so we stand the risk of leaking an
1857 existing tunnel process, so we always disconnect any older tunnels
1858 we might have through rpc_disconnect.
1859
1860 @param port: port on the remote host that is serving this proxy.
1861
1862 @return: The client proxy.
1863 """
1864 if not jsonrpclib:
1865 logging.warning('Jsonrpclib could not be imported. Check that '
1866 'site-packages contains jsonrpclib.')
1867 return None
1868
1869 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1870
1871 logging.info('Established a jsonrpc connection through port %s.', port)
1872 return proxy
1873
1874
1875 def rpc_disconnect(self, port):
1876 """Disconnect from an RPC server on the host.
1877
1878 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001879 the given `port`. Also closes the local ssh tunnel created
1880 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001881 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001882 client object; however disconnection will cause all
1883 subsequent calls to methods on the object to fail.
1884
1885 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001886 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001887
1888 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001889 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001890 """
beeps32a63082013-08-22 14:02:29 -07001891 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001892 return
Christopher Wileydd181852013-10-10 19:56:58 -07001893 remote_name, tunnel_proc, remote_pid = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001894 if remote_name:
1895 # We use 'pkill' to find our target process rather than
1896 # a PID, because the host may have rebooted since
1897 # connecting, and we don't want to kill an innocent
1898 # process with the same PID.
1899 #
1900 # 'pkill' helpfully exits with status 1 if no target
1901 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001902 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001903 # status.
1904 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001905 if remote_pid:
1906 logging.info('Waiting for RPC server "%s" shutdown',
1907 remote_name)
1908 start_time = time.time()
1909 while (time.time() - start_time <
1910 self._RPC_SHUTDOWN_TIMEOUT_SECONDS):
1911 running_processes = self.run(
1912 "pgrep -f '%s'" % remote_name,
1913 ignore_status=True).stdout.split()
1914 if not remote_pid in running_processes:
1915 logging.info('Shut down RPC server.')
1916 break
1917 time.sleep(self._RPC_SHUTDOWN_POLLING_PERIOD_SECONDS)
1918 else:
1919 raise error.TestError('Failed to shutdown RPC server %s' %
1920 remote_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001921
1922 if tunnel_proc.poll() is None:
1923 tunnel_proc.terminate()
1924 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1925 else:
1926 logging.debug('Tunnel pid %d terminated early, status %d',
1927 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001928 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001929
1930
beeps32a63082013-08-22 14:02:29 -07001931 def rpc_disconnect_all(self):
1932 """Disconnect all known RPC proxy ports."""
1933 for port in self._rpc_proxy_map.keys():
1934 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001935
1936
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001937 def poor_mans_rpc(self, fun):
1938 """
1939 Calls a function from client utils on the host and returns a string.
1940
1941 @param fun function in client utils namespace.
1942 @return output string from calling fun.
1943 """
Simran Basi263a9d32014-08-19 11:16:51 -07001944 script = 'cd %s/bin; ' % autotest.Autotest.get_installed_autodir(self)
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001945 script += 'python -c "import common; import utils;'
1946 script += 'print utils.%s"' % fun
1947 return script
1948
1949
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001950 def _ping_check_status(self, status):
1951 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001952
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001953 @param status Check the ping status against this value.
1954 @return True iff `status` and the result of ping are the same
1955 (i.e. both True or both False).
1956
1957 """
1958 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1959 return not (status ^ (ping_val == 0))
1960
1961 def _ping_wait_for_status(self, status, timeout):
1962 """Wait for the host to have a given status (UP or DOWN).
1963
1964 Status is checked by polling. Polling will not last longer
1965 than the number of seconds in `timeout`. The polling
1966 interval will be long enough that only approximately
1967 _PING_WAIT_COUNT polling cycles will be executed, subject
1968 to a maximum interval of about one minute.
1969
1970 @param status Waiting will stop immediately if `ping` of the
1971 host returns this status.
1972 @param timeout Poll for at most this many seconds.
1973 @return True iff the host status from `ping` matched the
1974 requested status at the time of return.
1975
1976 """
1977 # _ping_check_status() takes about 1 second, hence the
1978 # "- 1" in the formula below.
1979 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1980 end_time = time.time() + timeout
1981 while time.time() <= end_time:
1982 if self._ping_check_status(status):
1983 return True
1984 if poll_interval > 0:
1985 time.sleep(poll_interval)
1986
1987 # The last thing we did was sleep(poll_interval), so it may
1988 # have been too long since the last `ping`. Check one more
1989 # time, just to be sure.
1990 return self._ping_check_status(status)
1991
1992 def ping_wait_up(self, timeout):
1993 """Wait for the host to respond to `ping`.
1994
1995 N.B. This method is not a reliable substitute for
1996 `wait_up()`, because a host that responds to ping will not
1997 necessarily respond to ssh. This method should only be used
1998 if the target DUT can be considered functional even if it
1999 can't be reached via ssh.
2000
2001 @param timeout Minimum time to allow before declaring the
2002 host to be non-responsive.
2003 @return True iff the host answered to ping before the timeout.
2004
2005 """
2006 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002007
Andrew Bresticker678c0c72013-01-22 10:44:09 -08002008 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002009 """Wait until the host no longer responds to `ping`.
2010
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08002011 This function can be used as a slightly faster version of
2012 `wait_down()`, by avoiding potentially long ssh timeouts.
2013
2014 @param timeout Minimum time to allow for the host to become
2015 non-responsive.
2016 @return True iff the host quit answering ping before the
2017 timeout.
2018
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002019 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08002020 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002021
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002022 def test_wait_for_sleep(self, sleep_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002023 """Wait for the client to enter low-power sleep mode.
2024
2025 The test for "is asleep" can't distinguish a system that is
2026 powered off; to confirm that the unit was asleep, it is
2027 necessary to force resume, and then call
2028 `test_wait_for_resume()`.
2029
2030 This function is expected to be called from a test as part
2031 of a sequence like the following:
2032
2033 ~~~~~~~~
2034 boot_id = host.get_boot_id()
2035 # trigger sleep on the host
2036 host.test_wait_for_sleep()
2037 # trigger resume on the host
2038 host.test_wait_for_resume(boot_id)
2039 ~~~~~~~~
2040
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002041 @param sleep_timeout time limit in seconds to allow the host sleep.
2042
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002043 @exception TestFail The host did not go to sleep within
2044 the allowed time.
2045 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002046 if sleep_timeout is None:
2047 sleep_timeout = self.SLEEP_TIMEOUT
2048
2049 if not self.ping_wait_down(timeout=sleep_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002050 raise error.TestFail(
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002051 'client failed to sleep after %d seconds' % sleep_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002052
2053
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002054 def test_wait_for_resume(self, old_boot_id, resume_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002055 """Wait for the client to resume from low-power sleep mode.
2056
2057 The `old_boot_id` parameter should be the value from
2058 `get_boot_id()` obtained prior to entering sleep mode. A
2059 `TestFail` exception is raised if the boot id changes.
2060
2061 See @ref test_wait_for_sleep for more on this function's
2062 usage.
2063
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002064 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002065 target host went to sleep.
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002066 @param resume_timeout time limit in seconds to allow the host up.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002067
2068 @exception TestFail The host did not respond within the
2069 allowed time.
2070 @exception TestFail The host responded, but the boot id test
2071 indicated a reboot rather than a sleep
2072 cycle.
2073 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002074 if resume_timeout is None:
2075 resume_timeout = self.RESUME_TIMEOUT
2076
2077 if not self.wait_up(timeout=resume_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002078 raise error.TestFail(
2079 'client failed to resume from sleep after %d seconds' %
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002080 resume_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002081 else:
2082 new_boot_id = self.get_boot_id()
2083 if new_boot_id != old_boot_id:
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002084 logging.error('client rebooted (old boot %s, new boot %s)',
2085 old_boot_id, new_boot_id)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002086 raise error.TestFail(
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002087 'client rebooted, but sleep was expected')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002088
2089
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002090 def test_wait_for_shutdown(self, shutdown_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002091 """Wait for the client to shut down.
2092
2093 The test for "has shut down" can't distinguish a system that
2094 is merely asleep; to confirm that the unit was down, it is
2095 necessary to force boot, and then call test_wait_for_boot().
2096
2097 This function is expected to be called from a test as part
2098 of a sequence like the following:
2099
2100 ~~~~~~~~
2101 boot_id = host.get_boot_id()
2102 # trigger shutdown on the host
2103 host.test_wait_for_shutdown()
2104 # trigger boot on the host
2105 host.test_wait_for_boot(boot_id)
2106 ~~~~~~~~
2107
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002108 @param shutdown_timeout time limit in seconds to allow the host down.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002109 @exception TestFail The host did not shut down within the
2110 allowed time.
2111 """
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002112 if shutdown_timeout is None:
2113 shutdown_timeout = self.SHUTDOWN_TIMEOUT
2114
2115 if not self.ping_wait_down(timeout=shutdown_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002116 raise error.TestFail(
2117 'client failed to shut down after %d seconds' %
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002118 shutdown_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002119
2120
2121 def test_wait_for_boot(self, old_boot_id=None):
2122 """Wait for the client to boot from cold power.
2123
2124 The `old_boot_id` parameter should be the value from
2125 `get_boot_id()` obtained prior to shutting down. A
2126 `TestFail` exception is raised if the boot id does not
2127 change. The boot id test is omitted if `old_boot_id` is not
2128 specified.
2129
2130 See @ref test_wait_for_shutdown for more on this function's
2131 usage.
2132
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002133 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002134 shut down.
2135
2136 @exception TestFail The host did not respond within the
2137 allowed time.
2138 @exception TestFail The host responded, but the boot id test
2139 indicated that there was no reboot.
2140 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002141 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002142 raise error.TestFail(
2143 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002144 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002145 elif old_boot_id:
2146 if self.get_boot_id() == old_boot_id:
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002147 logging.error('client not rebooted (boot %s)',
2148 old_boot_id)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002149 raise error.TestFail(
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002150 'client is back up, but did not reboot')
Simran Basid5e5e272012-09-24 15:23:59 -07002151
2152
2153 @staticmethod
2154 def check_for_rpm_support(hostname):
2155 """For a given hostname, return whether or not it is powered by an RPM.
2156
Simran Basi1df55112013-09-06 11:25:09 -07002157 @param hostname: hostname to check for rpm support.
2158
Simran Basid5e5e272012-09-24 15:23:59 -07002159 @return None if this host does not follows the defined naming format
2160 for RPM powered DUT's in the lab. If it does follow the format,
2161 it returns a regular expression MatchObject instead.
2162 """
Fang Dengbaff9082015-01-06 13:46:15 -08002163 return re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002164
2165
2166 def has_power(self):
2167 """For this host, return whether or not it is powered by an RPM.
2168
2169 @return True if this host is in the CROS lab and follows the defined
2170 naming format.
2171 """
Fang Deng0ca40e22013-08-27 17:47:44 -07002172 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002173
2174
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002175 def _set_power(self, state, power_method):
2176 """Sets the power to the host via RPM, Servo or manual.
2177
2178 @param state Specifies which power state to set to DUT
2179 @param power_method Specifies which method of power control to
2180 use. By default "RPM" will be used. Valid values
2181 are the strings "RPM", "manual", "servoj10".
2182
2183 """
2184 ACCEPTABLE_STATES = ['ON', 'OFF']
2185
2186 if state.upper() not in ACCEPTABLE_STATES:
2187 raise error.TestError('State must be one of: %s.'
2188 % (ACCEPTABLE_STATES,))
2189
2190 if power_method == self.POWER_CONTROL_SERVO:
2191 logging.info('Setting servo port J10 to %s', state)
2192 self.servo.set('prtctl3_pwren', state.lower())
2193 time.sleep(self._USB_POWER_TIMEOUT)
2194 elif power_method == self.POWER_CONTROL_MANUAL:
2195 logging.info('You have %d seconds to set the AC power to %s.',
2196 self._POWER_CYCLE_TIMEOUT, state)
2197 time.sleep(self._POWER_CYCLE_TIMEOUT)
2198 else:
2199 if not self.has_power():
2200 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07002201 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
2202 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
2203 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07002204 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07002205
2206
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002207 def power_off(self, power_method=POWER_CONTROL_RPM):
2208 """Turn off power to this host via RPM, Servo or manual.
2209
2210 @param power_method Specifies which method of power control to
2211 use. By default "RPM" will be used. Valid values
2212 are the strings "RPM", "manual", "servoj10".
2213
2214 """
2215 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07002216
2217
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002218 def power_on(self, power_method=POWER_CONTROL_RPM):
2219 """Turn on power to this host via RPM, Servo or manual.
2220
2221 @param power_method Specifies which method of power control to
2222 use. By default "RPM" will be used. Valid values
2223 are the strings "RPM", "manual", "servoj10".
2224
2225 """
2226 self._set_power('ON', power_method)
2227
2228
2229 def power_cycle(self, power_method=POWER_CONTROL_RPM):
2230 """Cycle power to this host by turning it OFF, then ON.
2231
2232 @param power_method Specifies which method of power control to
2233 use. By default "RPM" will be used. Valid values
2234 are the strings "RPM", "manual", "servoj10".
2235
2236 """
2237 if power_method in (self.POWER_CONTROL_SERVO,
2238 self.POWER_CONTROL_MANUAL):
2239 self.power_off(power_method=power_method)
2240 time.sleep(self._POWER_CYCLE_TIMEOUT)
2241 self.power_on(power_method=power_method)
2242 else:
2243 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002244
2245
2246 def get_platform(self):
2247 """Determine the correct platform label for this host.
2248
2249 @returns a string representing this host's platform.
2250 """
2251 crossystem = utils.Crossystem(self)
2252 crossystem.init()
2253 # Extract fwid value and use the leading part as the platform id.
2254 # fwid generally follow the format of {platform}.{firmware version}
2255 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
2256 platform = crossystem.fwid().split('.')[0].lower()
2257 # Newer platforms start with 'Google_' while the older ones do not.
2258 return platform.replace('google_', '')
2259
2260
Hung-ying Tyanb1328032014-04-01 14:18:54 +08002261 def get_architecture(self):
2262 """Determine the correct architecture label for this host.
2263
2264 @returns a string representing this host's architecture.
2265 """
2266 crossystem = utils.Crossystem(self)
2267 crossystem.init()
2268 return crossystem.arch()
2269
2270
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002271 def get_chrome_version(self):
2272 """Gets the Chrome version number and milestone as strings.
2273
2274 Invokes "chrome --version" to get the version number and milestone.
2275
2276 @return A tuple (chrome_ver, milestone) where "chrome_ver" is the
2277 current Chrome version number as a string (in the form "W.X.Y.Z")
2278 and "milestone" is the first component of the version number
2279 (the "W" from "W.X.Y.Z"). If the version number cannot be parsed
2280 in the "W.X.Y.Z" format, the "chrome_ver" will be the full output
2281 of "chrome --version" and the milestone will be the empty string.
2282
2283 """
MK Ryu35d661e2014-09-25 17:44:10 -07002284 version_string = self.run(client_constants.CHROME_VERSION_COMMAND).stdout
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002285 return utils.parse_chrome_version(version_string)
2286
Aviv Keshet74c89a92013-02-04 15:18:30 -08002287 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07002288 def get_board(self):
2289 """Determine the correct board label for this host.
2290
2291 @returns a string representing this host's board.
2292 """
2293 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
2294 run_method=self.run)
2295 board = release_info['CHROMEOS_RELEASE_BOARD']
2296 # Devices in the lab generally have the correct board name but our own
2297 # development devices have {board_name}-signed-{key_type}. The board
2298 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08002299 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07002300 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08002301 return board_format_string % board.split('-')[0]
2302 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07002303
2304
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002305 @label_decorator('board_freq_mem')
2306 def get_board_with_frequency_and_memory(self):
2307 """
2308 Determines the board name with frequency and memory.
2309
2310 @returns a more detailed string representing the board. Examples are
2311 butterfly_1.1GHz_2GB, link_1.8GHz_4GB, x86-zgb_1.7GHz_2GB
2312 """
2313 board = self.run(self.poor_mans_rpc(
2314 'get_board_with_frequency_and_memory()')).stdout
2315 return 'board_freq_mem:%s' % str.strip(board)
2316
2317
Aviv Keshet74c89a92013-02-04 15:18:30 -08002318 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002319 def has_lightsensor(self):
2320 """Determine the correct board label for this host.
2321
2322 @returns the string 'lightsensor' if this host has a lightsensor or
2323 None if it does not.
2324 """
2325 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08002326 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07002327 try:
2328 # Run the search cmd following the symlinks. Stderr_tee is set to
2329 # None as there can be a symlink loop, but the command will still
2330 # execute correctly with a few messages printed to stderr.
2331 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
2332 return 'lightsensor'
2333 except error.AutoservRunError:
2334 # egrep exited with a return code of 1 meaning none of the possible
2335 # lightsensor files existed.
2336 return None
2337
2338
Aviv Keshet74c89a92013-02-04 15:18:30 -08002339 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002340 def has_bluetooth(self):
2341 """Determine the correct board label for this host.
2342
2343 @returns the string 'bluetooth' if this host has bluetooth or
2344 None if it does not.
2345 """
2346 try:
2347 self.run('test -d /sys/class/bluetooth/hci0')
2348 # test exited with a return code of 0.
2349 return 'bluetooth'
2350 except error.AutoservRunError:
2351 # test exited with a return code 1 meaning the directory did not
2352 # exist.
2353 return None
2354
2355
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002356 @label_decorator('gpu_family')
2357 def get_gpu_family(self):
2358 """
2359 Determine GPU family.
2360
2361 @returns a string representing the gpu family. Examples are mali, tegra,
2362 pinetrail, sandybridge, ivybridge, haswell and baytrail.
2363 """
2364 gpu_family = self.run(self.poor_mans_rpc('get_gpu_family()')).stdout
2365 return 'gpu_family:%s' % str.strip(gpu_family)
2366
2367
Ilja Friedel0ce0b602013-08-15 18:45:27 -07002368 @label_decorator('graphics')
2369 def get_graphics(self):
2370 """
2371 Determine the correct board label for this host.
2372
2373 @returns a string representing this host's graphics. For now ARM boards
2374 return graphics:gles while all other boards return graphics:gl. This
2375 may change over time, but for robustness reasons this should avoid
2376 executing code in actual graphics libraries (which may not be ready and
2377 is tested by graphics_GLAPICheck).
2378 """
2379 uname = self.run('uname -a').stdout.lower()
2380 if 'arm' in uname:
2381 return 'graphics:gles'
2382 return 'graphics:gl'
2383
2384
Bill Richardson4f595f52014-02-13 16:20:26 -08002385 @label_decorator('ec')
2386 def get_ec(self):
2387 """
2388 Determine the type of EC on this host.
2389
2390 @returns a string representing this host's embedded controller type.
2391 At present, it only returns "ec:cros", for Chrome OS ECs. Other types
2392 of EC (or none) don't return any strings, since no tests depend on
2393 those.
2394 """
2395 cmd = 'mosys ec info'
2396 # The output should look like these, so that the last field should
2397 # match our EC version scheme:
2398 #
2399 # stm | stm32f100 | snow_v1.3.139-375eb9f
2400 # ti | Unknown-10de | peppy_v1.5.114-5d52788
2401 #
2402 # Non-Chrome OS ECs will look like these:
2403 #
2404 # ENE | KB932 | 00BE107A00
2405 # ite | it8518 | 3.08
2406 #
2407 # And some systems don't have ECs at all (Lumpy, for example).
2408 regexp = r'^.*\|\s*(\S+_v\d+\.\d+\.\d+-[0-9a-f]+)\s*$'
2409
2410 ecinfo = self.run(command=cmd, ignore_status=True)
2411 if ecinfo.exit_status == 0:
2412 res = re.search(regexp, ecinfo.stdout)
2413 if res:
2414 logging.info("EC version is %s", res.groups()[0])
2415 return 'ec:cros'
2416 logging.info("%s got: %s", cmd, ecinfo.stdout)
2417 # Has an EC, but it's not a Chrome OS EC
2418 return None
2419 logging.info("%s exited with status %d", cmd, ecinfo.exit_status)
2420 # No EC present
2421 return None
2422
2423
Alec Berg31b932b2014-04-04 16:09:11 -07002424 @label_decorator('accels')
2425 def get_accels(self):
2426 """
2427 Determine the type of accelerometers on this host.
2428
2429 @returns a string representing this host's accelerometer type.
2430 At present, it only returns "accel:cros-ec", for accelerometers
2431 attached to a Chrome OS EC, or none, if no accelerometers.
2432 """
2433 # Check to make sure we have ectool
2434 rv = self.run('which ectool', ignore_status=True)
2435 if rv.exit_status:
2436 logging.info("No ectool cmd found, assuming no EC accelerometers")
2437 return None
2438
2439 # Check that the EC supports the motionsense command
2440 rv = self.run('ectool motionsense', ignore_status=True)
2441 if rv.exit_status:
2442 logging.info("EC does not support motionsense command "
2443 "assuming no EC accelerometers")
2444 return None
2445
2446 # Check that EC motion sensors are active
2447 active = self.run('ectool motionsense active').stdout.split('\n')
2448 if active[0] == "0":
2449 logging.info("Motion sense inactive, assuming no EC accelerometers")
2450 return None
2451
2452 logging.info("EC accelerometers found")
2453 return 'accel:cros-ec'
2454
2455
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002456 @label_decorator('chameleon')
2457 def has_chameleon(self):
2458 """Determine if a Chameleon connected to this host.
2459
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002460 @returns a list containing two strings ('chameleon' and
2461 'chameleon:' + label, e.g. 'chameleon:hdmi') if this host
2462 has a Chameleon or None if it has not.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002463 """
2464 if self._chameleon_host:
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002465 return ['chameleon', 'chameleon:' + self.chameleon.get_label()]
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002466 else:
2467 return None
2468
2469
Cheng-Yi Chiangf4104ff2014-12-23 19:39:01 +08002470 @label_decorator('audio_loopback_dongle')
2471 def has_loopback_dongle(self):
2472 """Determine if an audio loopback dongle is plugged to this host.
2473
2474 @returns 'audio_loopback_dongle' when there is an audio loopback dongle
2475 plugged to this host.
2476 None when there is no audio loopback dongle
2477 plugged to this host.
2478 """
2479 server_info = self.run(command='cras_test_client --dump_s',
2480 ignore_status=True).stdout
2481 if (cras_utils.node_type_is_plugged('HEADPHONE', server_info) and
2482 cras_utils.node_type_is_plugged('MIC', server_info)):
2483 return 'audio_loopback_dongle'
2484 else:
2485 return None
2486
2487
Derek Basehorec71ff622014-07-07 15:18:40 -07002488 @label_decorator('power_supply')
2489 def get_power_supply(self):
2490 """
2491 Determine what type of power supply the host has
2492
2493 @returns a string representing this host's power supply.
2494 'power:battery' when the device has a battery intended for
2495 extended use
2496 'power:AC_primary' when the device has a battery not intended
2497 for extended use (for moving the machine, etc)
2498 'power:AC_only' when the device has no battery at all.
2499 """
2500 psu = self.run(command='mosys psu type', ignore_status=True)
2501 if psu.exit_status:
2502 # The psu command for mosys is not included for all platforms. The
2503 # assumption is that the device will have a battery if the command
2504 # is not found.
2505 return 'power:battery'
2506
2507 psu_str = psu.stdout.strip()
2508 if psu_str == 'unknown':
2509 return None
2510
2511 return 'power:%s' % psu_str
2512
2513
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002514 @label_decorator('storage')
2515 def get_storage(self):
2516 """
2517 Determine the type of boot device for this host.
2518
2519 Determine if the internal device is SCSI or dw_mmc device.
2520 Then check that it is SSD or HDD or eMMC or something else.
2521
2522 @returns a string representing this host's internal device type.
2523 'storage:ssd' when internal device is solid state drive
2524 'storage:hdd' when internal device is hard disk drive
2525 'storage:mmc' when internal device is mmc drive
2526 None When internal device is something else or
2527 when we are unable to determine the type
2528 """
2529 # The output should be /dev/mmcblk* for SD/eMMC or /dev/sd* for scsi
2530 rootdev_cmd = ' '.join(['. /usr/sbin/write_gpt.sh;',
2531 '. /usr/share/misc/chromeos-common.sh;',
2532 'load_base_vars;',
2533 'get_fixed_dst_drive'])
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002534 rootdev = self.run(command=rootdev_cmd, ignore_status=True)
2535 if rootdev.exit_status:
2536 logging.info("Fail to run %s", rootdev_cmd)
2537 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002538 rootdev_str = rootdev.stdout.strip()
2539
2540 if not rootdev_str:
2541 return None
2542
2543 rootdev_base = os.path.basename(rootdev_str)
2544
2545 mmc_pattern = '/dev/mmcblk[0-9]'
2546 if re.match(mmc_pattern, rootdev_str):
2547 # Use type to determine if the internal device is eMMC or somthing
2548 # else. We can assume that MMC is always an internal device.
2549 type_cmd = 'cat /sys/block/%s/device/type' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002550 type = self.run(command=type_cmd, ignore_status=True)
2551 if type.exit_status:
2552 logging.info("Fail to run %s", type_cmd)
2553 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002554 type_str = type.stdout.strip()
2555
2556 if type_str == 'MMC':
2557 return 'storage:mmc'
2558
2559 scsi_pattern = '/dev/sd[a-z]+'
2560 if re.match(scsi_pattern, rootdev.stdout):
2561 # Read symlink for /sys/block/sd* to determine if the internal
2562 # device is connected via ata or usb.
2563 link_cmd = 'readlink /sys/block/%s' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002564 link = self.run(command=link_cmd, ignore_status=True)
2565 if link.exit_status:
2566 logging.info("Fail to run %s", link_cmd)
2567 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002568 link_str = link.stdout.strip()
2569 if 'usb' in link_str:
2570 return None
2571
2572 # Read rotation to determine if the internal device is ssd or hdd.
2573 rotate_cmd = str('cat /sys/block/%s/queue/rotational'
2574 % rootdev_base)
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002575 rotate = self.run(command=rotate_cmd, ignore_status=True)
2576 if rotate.exit_status:
2577 logging.info("Fail to run %s", rotate_cmd)
2578 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002579 rotate_str = rotate.stdout.strip()
2580
2581 rotate_dict = {'0':'storage:ssd', '1':'storage:hdd'}
2582 return rotate_dict.get(rotate_str)
2583
2584 # All other internal device / error case will always fall here
2585 return None
2586
2587
Dan Shi4e9a2aa2014-03-24 14:28:42 -07002588 @label_decorator('servo')
2589 def get_servo(self):
2590 """Determine if the host has a servo attached.
2591
2592 If the host has a working servo attached, it should have a servo label.
2593
2594 @return: string 'servo' if the host has servo attached. Otherwise,
2595 returns None.
2596 """
2597 return 'servo' if self._servo_host else None
2598
2599
Dan Shi5beba472014-05-28 22:46:07 -07002600 @label_decorator('video_labels')
2601 def get_video_labels(self):
2602 """Run /usr/local/bin/avtest_label_detect to get a list of video labels.
2603
2604 Sample output of avtest_label_detect:
2605 Detected label: hw_video_acc_vp8
2606 Detected label: webcam
2607
2608 @return: A list of labels detected by tool avtest_label_detect.
2609 """
2610 try:
2611 result = self.run('/usr/local/bin/avtest_label_detect').stdout
2612 return re.findall('^Detected label: (\w+)$', result, re.M)
2613 except error.AutoservRunError:
2614 # The tool is not installed.
2615 return []
2616
2617
mussa584b4462014-06-20 15:13:28 -07002618 @label_decorator('video_glitch_detection')
2619 def is_video_glitch_detection_supported(self):
2620 """ Determine if a board under test is supported for video glitch
2621 detection tests.
2622
2623 @return: 'video_glitch_detection' if board is supported, None otherwise.
2624 """
2625 parser = ConfigParser.SafeConfigParser()
2626 filename = os.path.join(
2627 common.autotest_dir, 'client/cros/video/device_spec.conf')
2628
2629 dut = self.get_board().replace(ds_constants.BOARD_PREFIX, '')
2630
2631 try:
2632 parser.read(filename)
mussa584b4462014-06-20 15:13:28 -07002633 supported_boards = parser.sections()
2634
Mussa83c84d62014-10-02 12:11:28 -07002635 return 'video_glitch_detection' if dut in supported_boards else None
mussa584b4462014-06-20 15:13:28 -07002636
2637 except ConfigParser.error:
2638 # something went wrong while parsing the conf file
2639 return None
2640
Katherine Threlkeld7b97a9f2014-06-24 13:47:14 -07002641 @label_decorator('touch_labels')
2642 def get_touch(self):
2643 """
2644 Determine whether board under test has a touchpad or touchscreen.
2645
2646 @return: A list of some combination of 'touchscreen' and 'touchpad',
2647 depending on what is present on the device.
2648 """
2649 labels = []
2650 input_cmd = '/opt/google/input/inputcontrol --names -t %s'
2651 for elt in ['touchpad', 'touchscreen']:
2652 if self.run(input_cmd % elt).stdout:
2653 labels.append(elt)
2654 return labels
2655
2656
mussa584b4462014-06-20 15:13:28 -07002657
Simran Basic6f1f7a2012-10-16 10:47:46 -07002658 def get_labels(self):
2659 """Return a list of labels for this given host.
2660
2661 This is the main way to retrieve all the automatic labels for a host
2662 as it will run through all the currently implemented label functions.
2663 """
2664 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08002665 for label_function in self._LABEL_FUNCTIONS:
J. Richard Barnetteb869b222014-09-03 17:55:44 -07002666 try:
2667 label = label_function(self)
2668 except Exception as e:
2669 logging.error('Label function %s failed; ignoring it.',
2670 label_function.__name__)
2671 logging.exception(e)
2672 label = None
Simran Basic6f1f7a2012-10-16 10:47:46 -07002673 if label:
Dan Shi5beba472014-05-28 22:46:07 -07002674 if type(label) is str:
2675 labels.append(label)
2676 elif type(label) is list:
2677 labels.extend(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -07002678 return labels
Dan Shi85276d42014-04-08 22:11:45 -07002679
2680
2681 def is_boot_from_usb(self):
2682 """Check if DUT is boot from USB.
2683
2684 @return: True if DUT is boot from usb.
2685 """
2686 device = self.run('rootdev -s -d').stdout.strip()
2687 removable = int(self.run('cat /sys/block/%s/removable' %
2688 os.path.basename(device)).stdout.strip())
2689 return removable == 1
Helen Zhang17dae2b2014-11-11 09:25:52 -08002690
2691
2692 def read_from_meminfo(self, key):
Dan Shi49ca0932014-11-14 11:22:27 -08002693 """Return the memory info from /proc/meminfo
Helen Zhang17dae2b2014-11-11 09:25:52 -08002694
2695 @param key: meminfo requested
2696
2697 @return the memory value as a string
2698
2699 """
Helen Zhang17dae2b2014-11-11 09:25:52 -08002700 meminfo = self.run('grep %s /proc/meminfo' % key).stdout.strip()
2701 logging.debug('%s', meminfo)
2702 return int(re.search(r'\d+', meminfo).group(0))