blob: 8acabab2394217267e1be6ee1b63783b5501f2cc [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
mussa584b4462014-06-20 15:13:28 -07005import ConfigParser
Aviv Keshet74c89a92013-02-04 15:18:30 -08006import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07007import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import logging
Dan Shi0f466e82013-02-22 15:44:58 -08009import os
Simran Basid5e5e272012-09-24 15:23:59 -070010import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080011import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070014import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070015
mussa584b4462014-06-20 15:13:28 -070016import common
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.bin import utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070018from autotest_lib.client.common_lib import autotemp
Richard Barnette0c73ffc2012-11-19 15:21:18 -080019from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080022from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080023from autotest_lib.client.common_lib.cros import retry
Gabe Blackb72f4fb2015-01-20 16:47:13 -080024from autotest_lib.client.common_lib.cros.graphite import autotest_es
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
MK Ryu35d661e2014-09-25 17:44:10 -070026from autotest_lib.client.cros import constants as client_constants
J. Richard Barnette84890bd2014-02-21 11:05:47 -080027from autotest_lib.client.cros import cros_ui
Cheng-Yi Chiangf4104ff2014-12-23 19:39:01 +080028from autotest_lib.client.cros.audio import cras_utils
MK Ryu35d661e2014-09-25 17:44:10 -070029from autotest_lib.server import autoserv_parser
30from autotest_lib.server import autotest
31from autotest_lib.server import constants
32from autotest_lib.server import crashcollect
Dan Shia1ecd5c2013-06-06 11:21:31 -070033from autotest_lib.server import utils as server_utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070034from autotest_lib.server.cros import provision
Scott Zawalski89c44dd2013-02-26 09:28:02 -050035from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070036from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Dan Shi9cb0eec2014-06-03 09:04:50 -070037from autotest_lib.server.cros.faft.config.config import Config as FAFTConfig
Fang Deng96667ca2013-08-01 17:46:18 -070038from autotest_lib.server.hosts import abstract_ssh
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +080039from autotest_lib.server.hosts import chameleon_host
Fang Deng5d518f42013-08-02 14:04:32 -070040from autotest_lib.server.hosts import servo_host
Simran Basidcff4252012-11-20 16:13:20 -080041from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070042
43
beeps32a63082013-08-22 14:02:29 -070044try:
45 import jsonrpclib
46except ImportError:
47 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070048
Fang Dengd1c2b732013-08-20 12:59:46 -070049
beepsc87ff602013-07-31 21:53:00 -070050class FactoryImageCheckerException(error.AutoservError):
51 """Exception raised when an image is a factory image."""
52 pass
53
54
Aviv Keshet74c89a92013-02-04 15:18:30 -080055def add_label_detector(label_function_list, label_list=None, label=None):
56 """Decorator used to group functions together into the provided list.
57 @param label_function_list: List of label detecting functions to add
58 decorated function to.
59 @param label_list: List of detectable labels to add detectable labels to.
60 (Default: None)
61 @param label: Label string that is detectable by this detection function
62 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080063 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070064 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080065 """
66 @param func: The function to be added as a detector.
67 """
68 label_function_list.append(func)
69 if label and label_list is not None:
70 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070071 return func
72 return add_func
73
74
Fang Deng0ca40e22013-08-27 17:47:44 -070075class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070076 """Chromium OS specific subclass of Host."""
77
78 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050079 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070080
Richard Barnette03a0c132012-11-05 12:40:35 -080081 # Timeout values (in seconds) associated with various Chrome OS
82 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070083 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080084 # In general, a good rule of thumb is that the timeout can be up
85 # to twice the typical measured value on the slowest platform.
86 # The times here have not necessarily been empirically tested to
87 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070088 #
89 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080090 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
91 # time to restart the netwowrk.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080092 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070093 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080094 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080095 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070096 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080097 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080098 # network.
beepsf079cfb2013-09-18 17:49:51 -070099 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800100 # POWERWASH_BOOT_TIMEOUT: Time to allow for a reboot that
101 # includes powerwash.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700102
103 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800104 RESUME_TIMEOUT = 10
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +0800105 SHUTDOWN_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700106 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700107 USB_BOOT_TIMEOUT = 150
J. Richard Barnette7817b052014-08-28 09:47:29 -0700108 INSTALL_TIMEOUT = 480
Dan Shi2c88eed2013-11-12 10:18:38 -0800109 POWERWASH_BOOT_TIMEOUT = 60
Chris Sosab76e0ee2013-05-22 16:55:41 -0700110
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800111 # REBOOT_TIMEOUT: How long to wait for a reboot.
112 #
Chris Sosab76e0ee2013-05-22 16:55:41 -0700113 # We have a long timeout to ensure we don't flakily fail due to other
114 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
Simran Basi1160e2c2013-10-04 16:00:24 -0700115 # TODO(sbasi - crbug.com/276094) Restore to 5 mins once the 'host did not
116 # return from reboot' bug is solved.
117 REBOOT_TIMEOUT = 480
Chris Sosab76e0ee2013-05-22 16:55:41 -0700118
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800119 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
120 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
121 _USB_POWER_TIMEOUT = 5
122 _POWER_CYCLE_TIMEOUT = 10
123
beeps32a63082013-08-22 14:02:29 -0700124 _RPC_PROXY_URL = 'http://localhost:%d'
Christopher Wileydd181852013-10-10 19:56:58 -0700125 _RPC_SHUTDOWN_POLLING_PERIOD_SECONDS = 2
Peter Qiu4410db72014-06-05 10:32:41 -0700126 # Set shutdown timeout to account for the time for restarting the UI.
127 _RPC_SHUTDOWN_TIMEOUT_SECONDS = cros_ui.RESTART_UI_TIMEOUT
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800128
Richard Barnette82c35912012-11-20 10:09:10 -0800129 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
130 'rpm_recovery_boards', type=str).split(',')
131
132 _MAX_POWER_CYCLE_ATTEMPTS = 6
133 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
Fang Dengdeba14f2014-11-14 11:54:09 -0800134 _RPM_HOSTNAME_REGEX = ('chromeos(\d+)(-row(\d+))?-rack(\d+[a-z]*)'
135 '-host(\d+)')
Gwendal Grignoua66f1d12014-12-03 10:07:26 -0800136 _LIGHT_SENSOR_FILES = [ "in_illuminance0_input",
137 "in_illuminance_input",
138 "in_illuminance0_raw",
139 "in_illuminance_raw",
140 "illuminance0_input"]
Richard Barnette82c35912012-11-20 10:09:10 -0800141 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
142 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800143 _DETECTABLE_LABELS = []
144 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
145 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700146
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800147 # Constants used in ping_wait_up() and ping_wait_down().
148 #
149 # _PING_WAIT_COUNT is the approximate number of polling
150 # cycles to use when waiting for a host state change.
151 #
152 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
153 # for arguments to the internal _ping_wait_for_status()
154 # method.
155 _PING_WAIT_COUNT = 40
156 _PING_STATUS_DOWN = False
157 _PING_STATUS_UP = True
158
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800159 # Allowed values for the power_method argument.
160
161 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
162 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
163 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
164 POWER_CONTROL_RPM = 'RPM'
165 POWER_CONTROL_SERVO = 'servoj10'
166 POWER_CONTROL_MANUAL = 'manual'
167
168 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
169 POWER_CONTROL_SERVO,
170 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800171
Simran Basi5e6339a2013-03-21 11:34:32 -0700172 _RPM_OUTLET_CHANGED = 'outlet_changed'
173
Dan Shi9cb0eec2014-06-03 09:04:50 -0700174 # URL pattern to download firmware image.
175 _FW_IMAGE_URL_PATTERN = global_config.global_config.get_config_value(
176 'CROS', 'firmware_url_pattern', type=str)
beeps687243d2013-07-18 15:29:27 -0700177
MK Ryu35d661e2014-09-25 17:44:10 -0700178 # File that has a list of directories to be collected
179 _LOGS_TO_COLLECT_FILE = os.path.join(
180 common.client_dir, 'common_lib', 'logs_to_collect')
181
182 # Prefix of logging message w.r.t. crash collection
183 _CRASHLOGS_PREFIX = 'collect_crashlogs'
184
185 # Time duration waiting for host up/down check
186 _CHECK_HOST_UP_TIMEOUT_SECS = 15
187
188 # A command that interacts with kernel and hardware (e.g., rm, mkdir, etc)
189 # might not be completely done deep through the hardware when the machine
190 # is powered down right after the command returns.
191 # We should wait for a few seconds to make them done. Finger crossed.
192 _SAFE_WAIT_SECS = 10
193
194
J. Richard Barnette964fba02012-10-24 17:34:29 -0700195 @staticmethod
beeps46dadc92013-11-07 14:07:10 -0800196 def check_host(host, timeout=10):
197 """
198 Check if the given host is a chrome-os host.
199
200 @param host: An ssh host representing a device.
201 @param timeout: The timeout for the run command.
202
203 @return: True if the host device is chromeos.
204
beeps46dadc92013-11-07 14:07:10 -0800205 """
206 try:
Christopher Wiley1ea80942014-02-26 16:45:08 -0800207 result = host.run('grep -q CHROMEOS /etc/lsb-release && '
Simran Basie5f7ae42014-06-26 15:44:06 -0700208 '! which adb >/dev/null 2>&1 && '
209 '! grep -q moblab /etc/lsb-release',
Christopher Wileyfc3eac02013-11-21 16:24:57 -0800210 ignore_status=True, timeout=timeout)
beeps46dadc92013-11-07 14:07:10 -0800211 except (error.AutoservRunError, error.AutoservSSHTimeout):
212 return False
213 return result.exit_status == 0
214
215
216 @staticmethod
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800217 def _extract_arguments(args_dict, key_subset):
218 """Extract options from `args_dict` and return a subset result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800219
220 Take the provided dictionary of argument options and return
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800221 a subset that represent standard arguments needed to construct
222 a test-assistant object (chameleon or servo) for a host. The
223 intent is to provide standard argument processing from
224 run_remote_tests for tests that require a test-assistant board
225 to operate.
226
227 @param args_dict Dictionary from which to extract the arguments.
228 @param key_subset Tuple of keys to extract from the args_dict, e.g.
229 ('servo_host', 'servo_port').
230 """
231 result = {}
232 for arg in key_subset:
233 if arg in args_dict:
234 result[arg] = args_dict[arg]
235 return result
236
237
238 @staticmethod
239 def get_chameleon_arguments(args_dict):
240 """Extract chameleon options from `args_dict` and return the result.
241
242 Recommended usage:
243 ~~~~~~~~
244 args_dict = utils.args_to_dict(args)
245 chameleon_args = hosts.CrosHost.get_chameleon_arguments(args_dict)
246 host = hosts.create_host(machine, chameleon_args=chameleon_args)
247 ~~~~~~~~
248
249 @param args_dict Dictionary from which to extract the chameleon
250 arguments.
251 """
252 return CrosHost._extract_arguments(
253 args_dict, ('chameleon_host', 'chameleon_port'))
254
255
256 @staticmethod
257 def get_servo_arguments(args_dict):
258 """Extract servo options from `args_dict` and return the result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800259
260 Recommended usage:
261 ~~~~~~~~
262 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700263 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800264 host = hosts.create_host(machine, servo_args=servo_args)
265 ~~~~~~~~
266
267 @param args_dict Dictionary from which to extract the servo
268 arguments.
269 """
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800270 return CrosHost._extract_arguments(
271 args_dict, ('servo_host', 'servo_port'))
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700272
J. Richard Barnette964fba02012-10-24 17:34:29 -0700273
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800274 def _initialize(self, hostname, chameleon_args=None, servo_args=None,
Fang Denge545abb2014-12-30 18:43:47 -0800275 try_lab_servo=False, ssh_verbosity_flag='', ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700276 *args, **dargs):
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800277 """Initialize superclasses, |self.chameleon|, and |self.servo|.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700278
Fang Denge545abb2014-12-30 18:43:47 -0800279 This method will attempt to create the test-assistant object
280 (chameleon/servo) when it is needed by the test. Check
281 the docstring of chameleon_host.create_chameleon_host and
282 servo_host.create_servo_host for how this is determined.
Fang Deng5d518f42013-08-02 14:04:32 -0700283
Fang Denge545abb2014-12-30 18:43:47 -0800284 @param hostname: Hostname of the dut.
285 @param chameleon_args: A dictionary that contains args for creating
286 a ChameleonHost. See chameleon_host for details.
287 @param servo_args: A dictionary that contains args for creating
288 a ServoHost object. See servo_host for details.
289 @param try_lab_servo: Boolean, False indicates that ServoHost should
290 not be created for a device in Cros test lab.
291 See servo_host for details.
292 @param ssh_verbosity_flag: String, to pass to the ssh command to control
293 verbosity.
294 @param ssh_options: String, other ssh options to pass to the ssh
295 command.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700296 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700297 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700298 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700299 # self.env is a dictionary of environment variable settings
300 # to be exported for commands run on the host.
301 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
302 # errors that might happen.
303 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700304 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700305 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700306 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700307 # TODO(fdeng): We need to simplify the
308 # process of servo and servo_host initialization.
309 # crbug.com/298432
Fang Denge545abb2014-12-30 18:43:47 -0800310 self._servo_host = servo_host.create_servo_host(
311 dut=self.hostname, servo_args=servo_args,
312 try_lab_servo=try_lab_servo)
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800313 # TODO(waihong): Do the simplication on Chameleon too.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800314 self._chameleon_host = chameleon_host.create_chameleon_host(
315 dut=self.hostname, chameleon_args=chameleon_args)
316
Dan Shi4d478522014-02-14 13:46:32 -0800317 if self._servo_host is not None:
318 self.servo = self._servo_host.get_servo()
319 else:
320 self.servo = None
321
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800322 if self._chameleon_host:
Tom Wai-Hong Tameaee3402014-01-22 08:52:10 +0800323 self.chameleon = self._chameleon_host.create_chameleon_board()
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800324 else:
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800325 self.chameleon = None
Fang Deng5d518f42013-08-02 14:04:32 -0700326
327
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500328 def get_repair_image_name(self):
329 """Generate a image_name from variables in the global config.
330
331 @returns a str of $board-version/$BUILD.
332
333 """
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500334 board = self._get_board_from_afe()
335 if board is None:
336 raise error.AutoservError('DUT has no board attribute, '
337 'cannot be repaired.')
Dan Shi6964fa52014-12-18 11:04:27 -0800338 stable_version = self._AFE.run('get_stable_version', board=board)
339 build_pattern = global_config.global_config.get_config_value(
340 'CROS', 'stable_build_pattern')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500341 return build_pattern % (board, stable_version)
342
343
Scott Zawalski62bacae2013-03-05 10:40:32 -0500344 def _host_in_AFE(self):
345 """Check if the host is an object the AFE knows.
346
347 @returns the host object.
348 """
349 return self._AFE.get_hosts(hostname=self.hostname)
350
351
Chris Sosab76e0ee2013-05-22 16:55:41 -0700352 def lookup_job_repo_url(self):
353 """Looks up the job_repo_url for the host.
354
355 @returns job_repo_url from AFE or None if not found.
356
357 @raises KeyError if the host does not have a job_repo_url
358 """
359 if not self._host_in_AFE():
360 return None
361
362 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700363 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
364 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700365
366
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500367 def clear_cros_version_labels_and_job_repo_url(self):
368 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500369 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400370 return
371
Scott Zawalski62bacae2013-03-05 10:40:32 -0500372 host_list = [self.hostname]
373 labels = self._AFE.get_labels(
374 name__startswith=ds_constants.VERSION_PREFIX,
375 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800376
Scott Zawalski62bacae2013-03-05 10:40:32 -0500377 for label in labels:
378 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500379
beepscb6f1e22013-06-28 19:14:10 -0700380 self.update_job_repo_url(None, None)
381
382
383 def update_job_repo_url(self, devserver_url, image_name):
384 """
385 Updates the job_repo_url host attribute and asserts it's value.
386
387 @param devserver_url: The devserver to use in the job_repo_url.
388 @param image_name: The name of the image to use in the job_repo_url.
389
390 @raises AutoservError: If we failed to update the job_repo_url.
391 """
392 repo_url = None
393 if devserver_url and image_name:
394 repo_url = tools.get_package_url(devserver_url, image_name)
395 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500396 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700397 if self.lookup_job_repo_url() != repo_url:
398 raise error.AutoservError('Failed to update job_repo_url with %s, '
399 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500400
401
Dan Shie9309262013-06-19 22:50:21 -0700402 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400403 """Add cros_version labels and host attribute job_repo_url.
404
405 @param image_name: The name of the image e.g.
406 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700407
Scott Zawalskieadbf702013-03-14 09:23:06 -0400408 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500409 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400410 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500411
Scott Zawalskieadbf702013-03-14 09:23:06 -0400412 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700413 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500414
415 labels = self._AFE.get_labels(name=cros_label)
416 if labels:
417 label = labels[0]
418 else:
419 label = self._AFE.create_label(name=cros_label)
420
421 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700422 self.update_job_repo_url(devserver_url, image_name)
423
424
beepsdae65fd2013-07-26 16:24:41 -0700425 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700426 """
427 Make sure job_repo_url of this host is valid.
428
joychen03eaad92013-06-26 09:55:21 -0700429 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700430 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
431 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
432 download and extract it. If the devserver embedded in the url is
433 unresponsive, update the job_repo_url of the host after staging it on
434 another devserver.
435
436 @param job_repo_url: A url pointing to the devserver where the autotest
437 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700438 @param tag: The tag from the server job, in the format
439 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700440
441 @raises DevServerException: If we could not resolve a devserver.
442 @raises AutoservError: If we're unable to save the new job_repo_url as
443 a result of choosing a new devserver because the old one failed to
444 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700445 @raises urllib2.URLError: If the devserver embedded in job_repo_url
446 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700447 """
448 job_repo_url = self.lookup_job_repo_url()
449 if not job_repo_url:
450 logging.warning('No job repo url set on host %s', self.hostname)
451 return
452
453 logging.info('Verifying job repo url %s', job_repo_url)
454 devserver_url, image_name = tools.get_devserver_build_from_package_url(
455 job_repo_url)
456
beeps0c865032013-07-30 11:37:06 -0700457 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700458
459 logging.info('Staging autotest artifacts for %s on devserver %s',
460 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700461
462 start_time = time.time()
Simran Basi25e7a922014-10-31 11:56:10 -0700463 ds.stage_artifacts(image_name, ['autotest_packages'])
beeps687243d2013-07-18 15:29:27 -0700464 stage_time = time.time() - start_time
465
466 # Record how much of the verification time comes from a devserver
467 # restage. If we're doing things right we should not see multiple
468 # devservers for a given board/build/branch path.
469 try:
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800470 board, build_type, branch = server_utils.ParseBuildName(
beeps687243d2013-07-18 15:29:27 -0700471 image_name)[:3]
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800472 except server_utils.ParseBuildNameException:
beeps687243d2013-07-18 15:29:27 -0700473 pass
474 else:
beeps0c865032013-07-30 11:37:06 -0700475 devserver = devserver_url[
Chris Sosa65425082013-10-16 13:26:22 -0700476 devserver_url.find('/') + 2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700477 stats_key = {
478 'board': board,
479 'build_type': build_type,
480 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700481 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700482 }
Gabe Black1e1c41b2015-02-04 23:55:15 -0800483 autotest_stats.Gauge('verify_job_repo_url').send(
beeps687243d2013-07-18 15:29:27 -0700484 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
485 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700486
Scott Zawalskieadbf702013-03-14 09:23:06 -0400487
Dan Shi0f466e82013-02-22 15:44:58 -0800488 def _try_stateful_update(self, update_url, force_update, updater):
489 """Try to use stateful update to initialize DUT.
490
491 When DUT is already running the same version that machine_install
492 tries to install, stateful update is a much faster way to clean up
493 the DUT for testing, compared to a full reimage. It is implemeted
494 by calling autoupdater.run_update, but skipping updating root, as
495 updating the kernel is time consuming and not necessary.
496
497 @param update_url: url of the image.
498 @param force_update: Set to True to update the image even if the DUT
499 is running the same version.
500 @param updater: ChromiumOSUpdater instance used to update the DUT.
501 @returns: True if the DUT was updated with stateful update.
502
503 """
J. Richard Barnette3f731032014-04-07 17:42:59 -0700504 # TODO(jrbarnette): Yes, I hate this re.match() test case.
505 # It's better than the alternative: see crbug.com/360944.
506 image_name = autoupdater.url_to_image_name(update_url)
507 release_pattern = r'^.*-release/R[0-9]+-[0-9]+\.[0-9]+\.0$'
508 if not re.match(release_pattern, image_name):
509 return False
Dan Shi0f466e82013-02-22 15:44:58 -0800510 if not updater.check_version():
511 return False
512 if not force_update:
513 logging.info('Canceling stateful update because the new and '
514 'old versions are the same.')
515 return False
516 # Following folders should be rebuilt after stateful update.
517 # A test file is used to confirm each folder gets rebuilt after
518 # the stateful update.
519 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
520 test_file = '.test_file_to_be_deleted'
521 for folder in folders_to_check:
522 touch_path = os.path.join(folder, test_file)
523 self.run('touch %s' % touch_path)
524
525 if not updater.run_update(force_update=True, update_root=False):
526 return False
527
528 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700529 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800530 check_file_cmd = 'test -f %s; echo $?'
531 for folder in folders_to_check:
532 test_file_path = os.path.join(folder, test_file)
533 result = self.run(check_file_cmd % test_file_path,
534 ignore_status=True)
535 if result.exit_status == 1:
536 return False
537 return True
538
539
J. Richard Barnette7275b612013-06-04 18:13:11 -0700540 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800541 """After the DUT is updated, confirm machine_install succeeded.
542
543 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700544 @param expected_kernel: kernel expected to be active after reboot,
545 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800546
547 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700548 # Touch the lab machine file to leave a marker that
549 # distinguishes this image from other test images.
550 # Afterwards, we must re-run the autoreboot script because
551 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800552 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800553 self.run('start autoreboot')
Chris Sosa65425082013-10-16 13:26:22 -0700554 updater.verify_boot_expectations(
555 expected_kernel, rollback_message=
556 'Build %s failed to boot on %s; system rolled back to previous'
557 'build' % (updater.update_version, self.hostname))
J. Richard Barnette7275b612013-06-04 18:13:11 -0700558 # Check that we've got the build we meant to install.
559 if not updater.check_version_to_confirm_install():
560 raise autoupdater.ChromiumOSError(
561 'Failed to update %s to build %s; found build '
562 '%s instead' % (self.hostname,
Chris Sosa65425082013-10-16 13:26:22 -0700563 updater.update_version,
564 updater.get_build_id()))
Dan Shi0f466e82013-02-22 15:44:58 -0800565
566
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700567 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400568 """Stage a build on a devserver and return the update_url.
569
570 @param image_name: a name like lumpy-release/R27-3837.0.0
571 @returns an update URL like:
572 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
573 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700574 if not image_name:
575 image_name = self.get_repair_image_name()
576 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400577 devserver = dev_server.ImageServer.resolve(image_name)
578 devserver.trigger_download(image_name, synchronous=False)
579 return tools.image_url_pattern() % (devserver.url(), image_name)
580
581
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700582 def stage_image_for_servo(self, image_name=None):
583 """Stage a build on a devserver and return the update_url.
584
585 @param image_name: a name like lumpy-release/R27-3837.0.0
586 @returns an update URL like:
587 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
588 """
589 if not image_name:
590 image_name = self.get_repair_image_name()
591 logging.info('Staging build for servo install: %s', image_name)
592 devserver = dev_server.ImageServer.resolve(image_name)
593 devserver.stage_artifacts(image_name, ['test_image'])
594 return devserver.get_test_image_url(image_name)
595
596
beepse539be02013-07-31 21:57:39 -0700597 def stage_factory_image_for_servo(self, image_name):
598 """Stage a build on a devserver and return the update_url.
599
600 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700601
beepse539be02013-07-31 21:57:39 -0700602 @return: An update URL, eg:
603 http://<devserver>/static/canary-channel/\
604 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700605
606 @raises: ValueError if the factory artifact name is missing from
607 the config.
608
beepse539be02013-07-31 21:57:39 -0700609 """
610 if not image_name:
611 logging.error('Need an image_name to stage a factory image.')
612 return
613
beeps12c0a3c2013-09-03 11:58:27 -0700614 factory_artifact = global_config.global_config.get_config_value(
615 'CROS', 'factory_artifact', type=str, default='')
616 if not factory_artifact:
617 raise ValueError('Cannot retrieve the factory artifact name from '
618 'autotest config, and hence cannot stage factory '
619 'artifacts.')
620
beepse539be02013-07-31 21:57:39 -0700621 logging.info('Staging build for servo install: %s', image_name)
622 devserver = dev_server.ImageServer.resolve(image_name)
623 devserver.stage_artifacts(
624 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700625 [factory_artifact],
626 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700627
628 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
629
630
Chris Sosaa3ac2152012-05-23 22:23:13 -0700631 def machine_install(self, update_url=None, force_update=False,
Fang Deng3d3b9272014-12-22 12:20:28 -0800632 local_devserver=False, repair=False,
633 force_full_update=False):
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500634 """Install the DUT.
635
Dan Shi0f466e82013-02-22 15:44:58 -0800636 Use stateful update if the DUT is already running the same build.
637 Stateful update does not update kernel and tends to run much faster
638 than a full reimage. If the DUT is running a different build, or it
639 failed to do a stateful update, full update, including kernel update,
640 will be applied to the DUT.
641
Scott Zawalskieadbf702013-03-14 09:23:06 -0400642 Once a host enters machine_install its cros_version label will be
643 removed as well as its host attribute job_repo_url (used for
644 package install).
645
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500646 @param update_url: The url to use for the update
647 pattern: http://$devserver:###/update/$build
648 If update_url is None and repair is True we will install the
Dan Shi6964fa52014-12-18 11:04:27 -0800649 stable image listed in afe_stable_versions table. If the table
650 is not setup, global_config value under CROS.stable_cros_version
651 will be used instead.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500652 @param force_update: Force an update even if the version installed
653 is the same. Default:False
654 @param local_devserver: Used by run_remote_test to allow people to
655 use their local devserver. Default: False
656 @param repair: Whether or not we are in repair mode. This adds special
657 cases for repairing a machine like starting update_engine.
658 Setting repair to True sets force_update to True as well.
659 default: False
Fang Deng3d3b9272014-12-22 12:20:28 -0800660 @param force_full_update: If True, do not attempt to run stateful
661 update, force a full reimage. If False, try stateful update
662 first when the dut is already installed with the same version.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500663 @raises autoupdater.ChromiumOSError
664
665 """
Dan Shi7458bf62013-06-10 12:50:16 -0700666 if update_url:
667 logging.debug('update url is set to %s', update_url)
668 else:
669 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700670 if self._parser.options.image:
671 requested_build = self._parser.options.image
672 if requested_build.startswith('http://'):
673 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700674 logging.debug('update url is retrieved from requested_build'
675 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700676 else:
677 # Try to stage any build that does not start with
678 # http:// on the devservers defined in
679 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700680 update_url = self._stage_image_for_update(requested_build)
681 logging.debug('Build staged, and update_url is set to: %s',
682 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700683 elif repair:
684 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700685 logging.debug('Build staged, and update_url is set to: %s',
686 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400687 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700688 raise autoupdater.ChromiumOSError(
689 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500690
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500691 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800692 # In case the system is in a bad state, we always reboot the machine
693 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700694 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500695 self.run('stop update-engine; start update-engine')
696 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800697
Chris Sosaa3ac2152012-05-23 22:23:13 -0700698 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700699 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800700 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400701 # Remove cros-version and job_repo_url host attribute from host.
702 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800703 # If the DUT is already running the same build, try stateful update
704 # first. Stateful update does not update kernel and tends to run much
705 # faster than a full reimage.
Fang Deng3d3b9272014-12-22 12:20:28 -0800706 if not force_full_update:
707 try:
708 updated = self._try_stateful_update(
709 update_url, force_update, updater)
710 if updated:
711 logging.info('DUT is updated with stateful update.')
712 except Exception as e:
713 logging.exception(e)
714 logging.warning('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700715
Dan Shi0f466e82013-02-22 15:44:58 -0800716 inactive_kernel = None
717 # Do a full update if stateful update is not applicable or failed.
718 if not updated:
Chris Sosab7612bc2013-03-21 10:32:37 -0700719 # TODO(sosa): Remove temporary hack to get rid of bricked machines
720 # that can't update due to a corrupted policy.
721 self.run('rm -rf /var/lib/whitelist')
Danny Chanad490bd2014-11-04 14:15:57 -0800722 self.run('mkdir /var/lib/whitelist')
Chris Sosab7612bc2013-03-21 10:32:37 -0700723 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400724 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700725
Dan Shi0f466e82013-02-22 15:44:58 -0800726 if updater.run_update(force_update):
727 updated = True
728 # Figure out active and inactive kernel.
729 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700730
Dan Shi0f466e82013-02-22 15:44:58 -0800731 # Ensure inactive kernel has higher priority than active.
732 if (updater.get_kernel_priority(inactive_kernel)
733 < updater.get_kernel_priority(active_kernel)):
734 raise autoupdater.ChromiumOSError(
735 'Update failed. The priority of the inactive kernel'
736 ' partition is less than that of the active kernel'
737 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700738
Dan Shi0f466e82013-02-22 15:44:58 -0800739 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700740 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi5699ac22014-12-19 10:55:49 -0800741
742 if updated:
Dan Shi0f466e82013-02-22 15:44:58 -0800743 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400744 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700745 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800746
Simran Basiae08c8c2014-09-02 11:17:26 -0700747 logging.debug('Cleaning up old autotest directories.')
748 try:
749 installed_autodir = autotest.Autotest.get_installed_autodir(self)
750 self.run('rm -rf ' + installed_autodir)
751 except autotest.AutodirNotFoundError:
752 logging.debug('No autotest installed directory found.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700753
754
Dan Shi9cb0eec2014-06-03 09:04:50 -0700755 def _clear_fw_version_labels(self):
756 """Clear firmware version labels from the machine."""
757 labels = self._AFE.get_labels(
758 name__startswith=provision.FW_VERSION_PREFIX,
759 host__hostname=self.hostname)
760 for label in labels:
761 label.remove_hosts(hosts=[self.hostname])
762
763
764 def _add_fw_version_label(self, build):
765 """Add firmware version label to the machine.
766
767 @param build: Build of firmware.
768
769 """
770 fw_label = provision.fw_version_to_label(build)
771 provision.ensure_label_exists(fw_label)
772 label = self._AFE.get_labels(name__startswith=fw_label)[0]
773 label.add_hosts([self.hostname])
774
775
776 def firmware_install(self, build=None):
777 """Install firmware to the DUT.
778
779 Use stateful update if the DUT is already running the same build.
780 Stateful update does not update kernel and tends to run much faster
781 than a full reimage. If the DUT is running a different build, or it
782 failed to do a stateful update, full update, including kernel update,
783 will be applied to the DUT.
784
785 Once a host enters firmware_install its fw_version label will be
786 removed. After the firmware is updated successfully, a new fw_version
787 label will be added to the host.
788
789 @param build: The build version to which we want to provision the
790 firmware of the machine,
791 e.g. 'link-firmware/R22-2695.1.144'.
792
793 TODO(dshi): After bug 381718 is fixed, update here with corresponding
794 exceptions that could be raised.
795
796 """
797 if not self.servo:
798 raise error.TestError('Host %s does not have servo.' %
799 self.hostname)
800
801 # TODO(fdeng): use host.get_board() after
802 # crbug.com/271834 is fixed.
803 board = self._get_board_from_afe()
804
805 # If build is not set, assume it's repair mode and try to install
806 # firmware from stable CrOS.
807 if not build:
808 build = self.get_repair_image_name()
809
810 config = FAFTConfig(board)
811 if config.use_u_boot:
812 ap_image = 'image-%s.bin' % board
813 else: # Depthcharge platform
814 ap_image = 'image.bin'
815 ec_image = 'ec.bin'
816 ds = dev_server.ImageServer.resolve(build)
817 ds.stage_artifacts(build, ['firmware'])
818
819 tmpd = autotemp.tempdir(unique_id='fwimage')
820 try:
821 fwurl = self._FW_IMAGE_URL_PATTERN % (ds.url(), build)
822 local_tarball = os.path.join(tmpd.name, os.path.basename(fwurl))
823 server_utils.system('wget -O %s %s' % (local_tarball, fwurl),
824 timeout=60)
825 server_utils.system('tar xf %s -C %s %s %s' %
826 (local_tarball, tmpd.name, ap_image, ec_image),
827 timeout=60)
828 server_utils.system('tar xf %s --wildcards -C %s "dts/*"' %
829 (local_tarball, tmpd.name),
830 timeout=60, ignore_status=True)
831
832 self._clear_fw_version_labels()
833 logging.info('Will re-program EC now')
834 self.servo.program_ec(os.path.join(tmpd.name, ec_image))
835 logging.info('Will re-program BIOS now')
836 self.servo.program_bios(os.path.join(tmpd.name, ap_image))
837 self.servo.get_power_state_controller().reset()
838 time.sleep(self.servo.BOOT_DELAY)
839 self._add_fw_version_label()
840 finally:
841 tmpd.clean()
842
843
Dan Shi10e992b2013-08-30 11:02:59 -0700844 def show_update_engine_log(self):
845 """Output update engine log."""
MK Ryu35d661e2014-09-25 17:44:10 -0700846 logging.debug('Dumping %s', client_constants.UPDATE_ENGINE_LOG)
847 self.run('cat %s' % client_constants.UPDATE_ENGINE_LOG)
Dan Shi10e992b2013-08-30 11:02:59 -0700848
849
Richard Barnette82c35912012-11-20 10:09:10 -0800850 def _get_board_from_afe(self):
851 """Retrieve this host's board from its labels in the AFE.
852
853 Looks for a host label of the form "board:<board>", and
854 returns the "<board>" part of the label. `None` is returned
855 if there is not a single, unique label matching the pattern.
856
857 @returns board from label, or `None`.
858 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700859 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800860
861
862 def get_build(self):
863 """Retrieve the current build for this Host from the AFE.
864
865 Looks through this host's labels in the AFE to determine its build.
866
867 @returns The current build or None if it could not find it or if there
868 were multiple build labels assigned to this host.
869 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700870 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800871
872
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500873 def _install_repair(self):
874 """Attempt to repair this host using upate-engine.
875
876 If the host is up, try installing the DUT with a stable
Dan Shi6964fa52014-12-18 11:04:27 -0800877 "repair" version of Chrome OS as defined in afe_stable_versions table.
878 If the table is not setup, global_config value under
879 CROS.stable_cros_version will be used instead.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500880
Scott Zawalski62bacae2013-03-05 10:40:32 -0500881 @raises AutoservRepairMethodNA if the DUT is not reachable.
882 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500883
884 """
885 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500886 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500887 logging.info('Attempting to reimage machine to repair image.')
888 try:
889 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700890 except autoupdater.ChromiumOSError as e:
891 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500892 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500893 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500894
895
Dan Shi2c88eed2013-11-12 10:18:38 -0800896 def _install_repair_with_powerwash(self):
Dan Shi9cc48452013-11-12 12:39:26 -0800897 """Attempt to powerwash first then repair this host using update-engine.
Dan Shi2c88eed2013-11-12 10:18:38 -0800898
Dan Shi9cc48452013-11-12 12:39:26 -0800899 update-engine may fail due to a bad image. In such case, powerwash
900 may help to cleanup the DUT for update-engine to work again.
Dan Shi2c88eed2013-11-12 10:18:38 -0800901
902 @raises AutoservRepairMethodNA if the DUT is not reachable.
903 @raises ChromiumOSError if the install failed for some reason.
904
905 """
906 if not self.is_up():
907 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
908
909 logging.info('Attempting to powerwash the DUT.')
910 self.run('echo "fast safe" > '
911 '/mnt/stateful_partition/factory_install_reset')
912 self.reboot(timeout=self.POWERWASH_BOOT_TIMEOUT, wait=True)
913 if not self.is_up():
Dan Shi9cc48452013-11-12 12:39:26 -0800914 logging.error('Powerwash failed. DUT did not come back after '
Dan Shi2c88eed2013-11-12 10:18:38 -0800915 'reboot.')
916 raise error.AutoservRepairFailure(
917 'DUT failed to boot from powerwash after %d seconds' %
918 self.POWERWASH_BOOT_TIMEOUT)
919
920 logging.info('Powerwash succeeded.')
921 self._install_repair()
922
923
beepsf079cfb2013-09-18 17:49:51 -0700924 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
925 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500926 """
927 Re-install the OS on the DUT by:
928 1) installing a test image on a USB storage device attached to the Servo
929 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800930 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700931 3) installing the image with chromeos-install.
932
Scott Zawalski62bacae2013-03-05 10:40:32 -0500933 @param image_url: If specified use as the url to install on the DUT.
934 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700935 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
936 Factory images need a longer usb_boot_timeout than regular
937 cros images.
938 @param install_timeout: The timeout to use when installing the chromeos
939 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800940
Scott Zawalski62bacae2013-03-05 10:40:32 -0500941 @raises AutoservError if the image fails to boot.
beepsf079cfb2013-09-18 17:49:51 -0700942
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800943 """
beepsf079cfb2013-09-18 17:49:51 -0700944 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
945 % usb_boot_timeout)
946 logging.info('Downloading image to USB, then booting from it. Usb boot '
947 'timeout = %s', usb_boot_timeout)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800948 timer = autotest_stats.Timer(usb_boot_timer_key)
beepsf079cfb2013-09-18 17:49:51 -0700949 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700950 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700951 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500952 raise error.AutoservRepairFailure(
953 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700954 usb_boot_timeout)
955 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500956
beepsf079cfb2013-09-18 17:49:51 -0700957 install_timer_key = ('servo_install.install_timeout_%s'
958 % install_timeout)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800959 timer = autotest_stats.Timer(install_timer_key)
beepsf079cfb2013-09-18 17:49:51 -0700960 timer.start()
961 logging.info('Installing image through chromeos-install.')
MK Ryu35d661e2014-09-25 17:44:10 -0700962 self.run('chromeos-install --yes --lab_preserve_logs=%s' %
963 self._LOGS_TO_COLLECT_FILE,
964 timeout=install_timeout)
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800965 self.run('halt')
beepsf079cfb2013-09-18 17:49:51 -0700966 timer.stop()
967
968 logging.info('Power cycling DUT through servo.')
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800969 self.servo.get_power_state_controller().power_off()
Fang Dengafb88142013-05-30 17:44:31 -0700970 self.servo.switch_usbkey('off')
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800971 # N.B. The Servo API requires that we use power_on() here
972 # for two reasons:
973 # 1) After turning on a DUT in recovery mode, you must turn
974 # it off and then on with power_on() once more to
975 # disable recovery mode (this is a Parrot specific
976 # requirement).
977 # 2) After power_off(), the only way to turn on is with
978 # power_on() (this is a Storm specific requirement).
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700979 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -0700980
981 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800982 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
983 raise error.AutoservError('DUT failed to reboot installed '
984 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500985 self.BOOT_TIMEOUT)
986
987
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700988 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500989 """Reinstall the DUT utilizing servo and a test image.
990
991 Re-install the OS on the DUT by:
992 1) installing a test image on a USB storage device attached to the Servo
993 board,
994 2) booting that image in recovery mode, and then
995 3) installing the image with chromeos-install.
996
Scott Zawalski62bacae2013-03-05 10:40:32 -0500997 @raises AutoservRepairMethodNA if the device does not have servo
998 support.
999
1000 """
1001 if not self.servo:
1002 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
1003 'DUT has no servo support.')
1004
1005 logging.info('Attempting to recovery servo enabled device with '
1006 'servo_repair_reinstall')
1007
J. Richard Barnettee4af8b92013-05-01 13:16:12 -07001008 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001009 self.servo_install(image_url)
1010
1011
1012 def _servo_repair_power(self):
1013 """Attempt to repair DUT using an attached Servo.
1014
1015 Attempt to power on the DUT via power_long_press.
1016
1017 @raises AutoservRepairMethodNA if the device does not have servo
1018 support.
1019 @raises AutoservRepairFailure if the repair fails for any reason.
1020 """
1021 if not self.servo:
1022 raise error.AutoservRepairMethodNA('Repair Power NA: '
1023 'DUT has no servo support.')
1024
1025 logging.info('Attempting to recover servo enabled device by '
1026 'powering it off and on.')
1027 self.servo.get_power_state_controller().power_off()
1028 self.servo.get_power_state_controller().power_on()
1029 if self.wait_up(self.BOOT_TIMEOUT):
1030 return
1031
1032 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -08001033
1034
Richard Barnette82c35912012-11-20 10:09:10 -08001035 def _powercycle_to_repair(self):
1036 """Utilize the RPM Infrastructure to bring the host back up.
1037
1038 If the host is not up/repaired after the first powercycle we utilize
1039 auto fallback to the last good install by powercycling and rebooting the
1040 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001041
1042 @raises AutoservRepairMethodNA if the device does not support remote
1043 power.
1044 @raises AutoservRepairFailure if the repair fails for any reason.
1045
Richard Barnette82c35912012-11-20 10:09:10 -08001046 """
Scott Zawalski62bacae2013-03-05 10:40:32 -05001047 if not self.has_power():
1048 raise error.AutoservRepairMethodNA('Device does not support power.')
1049
Richard Barnette82c35912012-11-20 10:09:10 -08001050 logging.info('Attempting repair via RPM powercycle.')
1051 failed_cycles = 0
1052 self.power_cycle()
1053 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
1054 failed_cycles += 1
1055 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -05001056 raise error.AutoservRepairFailure(
1057 'Powercycled host %s %d times; device did not come back'
1058 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -08001059 self.power_cycle()
1060 if failed_cycles == 0:
1061 logging.info('Powercycling was successful first time.')
1062 else:
1063 logging.info('Powercycling was successful after %d failures.',
1064 failed_cycles)
1065
1066
MK Ryu35d661e2014-09-25 17:44:10 -07001067 def _reboot_repair(self):
1068 """SSH to this host and reboot."""
1069 if not self.is_up(self._CHECK_HOST_UP_TIMEOUT_SECS):
1070 raise error.AutoservRepairMethodNA('DUT unreachable for reboot.')
1071 logging.info('Attempting repair via SSH reboot.')
1072 self.reboot(timeout=self.BOOT_TIMEOUT, wait=True)
1073
1074
Prashanth B4d8184f2014-05-05 12:22:02 -07001075 def check_device(self):
1076 """Check if a device is ssh-able, and if so, clean and verify it.
1077
1078 @raise AutoservSSHTimeout: If the ssh ping times out.
1079 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
1080 permissions.
1081 @raise AutoservSshPingHostError: For other AutoservRunErrors during
1082 ssh_ping.
1083 @raises AutoservError: As appropriate, during cleanup and verify.
1084 """
1085 self.ssh_ping()
1086 self.cleanup()
1087 self.verify()
1088
1089
Richard Barnette82c35912012-11-20 10:09:10 -08001090 def repair_full(self):
1091 """Repair a host for repair level NO_PROTECTION.
1092
1093 This overrides the base class function for repair; it does
1094 not call back to the parent class, but instead offers a
1095 simplified implementation based on the capabilities in the
1096 Chrome OS test lab.
1097
Fang Deng5d518f42013-08-02 14:04:32 -07001098 It first verifies and repairs servo if it is a DUT in CrOS
Fang Deng03590af2013-10-07 17:34:20 -07001099 lab and a servo is attached.
Fang Deng5d518f42013-08-02 14:04:32 -07001100
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001101 This escalates in order through the following procedures and verifies
1102 the status using `self.check_device()` after each of them. This is done
1103 until both the repair and the veryfing step succeed.
1104
MK Ryu35d661e2014-09-25 17:44:10 -07001105 Escalation order of repair procedures from less intrusive to
1106 more intrusive repairs:
1107 1. SSH to the DUT and reboot.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001108 2. If there's a servo for the DUT, try to power the DUT off and
1109 on.
MK Ryu35d661e2014-09-25 17:44:10 -07001110 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -08001111 by power-cycling.
MK Ryu35d661e2014-09-25 17:44:10 -07001112 4. Try to re-install to a known stable image using
1113 auto-update.
1114 5. If there's a servo for the DUT, try to re-install via
1115 the servo.
Richard Barnette82c35912012-11-20 10:09:10 -08001116
1117 As with the parent method, the last operation performed on
Prashanth B4d8184f2014-05-05 12:22:02 -07001118 the DUT must be to call `self.check_device()`; If that call fails the
1119 exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -07001120
Scott Zawalski62bacae2013-03-05 10:40:32 -05001121 @raises AutoservRepairTotalFailure if the repair process fails to
1122 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -07001123 @raises ServoHostRepairTotalFailure if the repair process fails to
1124 fix the servo host if one is attached to the DUT.
1125 @raises AutoservSshPermissionDeniedError if it is unable
1126 to ssh to the servo host due to permission error.
1127
Richard Barnette82c35912012-11-20 10:09:10 -08001128 """
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001129 # Caution: Deleting shards relies on repair to always reboot the DUT.
1130
Dan Shi4d478522014-02-14 13:46:32 -08001131 if self._servo_host and not self.servo:
Fang Deng03590af2013-10-07 17:34:20 -07001132 try:
Dan Shi4d478522014-02-14 13:46:32 -08001133 self._servo_host.repair_full()
Fang Deng03590af2013-10-07 17:34:20 -07001134 except Exception as e:
Fang Deng03590af2013-10-07 17:34:20 -07001135 logging.error('Could not create a healthy servo: %s', e)
Dan Shi4d478522014-02-14 13:46:32 -08001136 self.servo = self._servo_host.get_servo()
Fang Deng5d518f42013-08-02 14:04:32 -07001137
MK Ryu35d661e2014-09-25 17:44:10 -07001138 self.try_collect_crashlogs()
1139
Scott Zawalski62bacae2013-03-05 10:40:32 -05001140 # TODO(scottz): This should use something similar to label_decorator,
1141 # but needs to be populated in order so DUTs are repaired with the
1142 # least amount of effort.
MK Ryu35d661e2014-09-25 17:44:10 -07001143 repair_funcs = [self._reboot_repair,
1144 self._servo_repair_power,
1145 self._powercycle_to_repair,
Dan Shi849a1c42014-03-05 11:10:43 -08001146 self._install_repair,
Dan Shi2c88eed2013-11-12 10:18:38 -08001147 self._install_repair_with_powerwash,
MK Ryu35d661e2014-09-25 17:44:10 -07001148 self._servo_repair_reinstall]
Scott Zawalski62bacae2013-03-05 10:40:32 -05001149 errors = []
Simran Basie6130932013-10-01 14:07:52 -07001150 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001151 for repair_func in repair_funcs:
1152 try:
1153 repair_func()
MK Ryu35d661e2014-09-25 17:44:10 -07001154 self.try_collect_crashlogs()
Prashanth B4d8184f2014-05-05 12:22:02 -07001155 self.check_device()
Gabe Black1e1c41b2015-02-04 23:55:15 -08001156 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001157 '%s.SUCCEEDED' % repair_func.__name__).increment()
1158 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001159 autotest_stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001160 '%s.%s.SUCCEEDED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001161 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001162 return
Simran Basie6130932013-10-01 14:07:52 -07001163 except error.AutoservRepairMethodNA as e:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001164 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001165 '%s.RepairNA' % repair_func.__name__).increment()
1166 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001167 autotest_stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001168 '%s.%s.RepairNA' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001169 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001170 logging.warning('Repair function NA: %s', e)
Simran Basie6130932013-10-01 14:07:52 -07001171 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -05001172 except Exception as e:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001173 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001174 '%s.FAILED' % repair_func.__name__).increment()
1175 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001176 autotest_stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001177 '%s.%s.FAILED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001178 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001179 logging.warning('Failed to repair device: %s', e)
Scott Zawalski62bacae2013-03-05 10:40:32 -05001180 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -05001181
Gabe Black1e1c41b2015-02-04 23:55:15 -08001182 autotest_stats.Counter('Full_Repair_Failed').increment()
Simran Basie6130932013-10-01 14:07:52 -07001183 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001184 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001185 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001186 raise error.AutoservRepairTotalFailure(
1187 'All attempts at repairing the device failed:\n%s' %
1188 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -08001189
1190
MK Ryu35d661e2014-09-25 17:44:10 -07001191 def try_collect_crashlogs(self, check_host_up=True):
1192 """
1193 Check if a host is up and logs need to be collected from the host,
1194 if yes, collect them.
1195
1196 @param check_host_up: Flag for checking host is up. Default is True.
1197 """
1198 try:
1199 crash_job = self._need_crash_logs()
1200 if crash_job:
1201 logging.debug('%s: Job %s was crashed', self._CRASHLOGS_PREFIX,
1202 crash_job)
1203 if not check_host_up or self.is_up(
1204 self._CHECK_HOST_UP_TIMEOUT_SECS):
1205 self._collect_crashlogs(crash_job)
1206 logging.debug('%s: Completed collecting logs for the '
1207 'crashed job %s', self._CRASHLOGS_PREFIX,
1208 crash_job)
1209 except Exception as e:
1210 # Exception should not result in repair failure.
1211 # Therefore, suppress all exceptions here.
1212 logging.error('%s: Failed while trying to collect crash-logs: %s',
1213 self._CRASHLOGS_PREFIX, e)
1214
1215
1216 def _need_crash_logs(self):
1217 """Get the value of need_crash_logs attribute of this host.
1218
1219 @return: Value string of need_crash_logs attribute
1220 None if there is no need_crash_logs attribute
1221 """
1222 attrs = self._AFE.get_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1223 hostname=self.hostname)
1224 assert len(attrs) < 2
1225 return attrs[0].value if attrs else None
1226
1227
1228 def _collect_crashlogs(self, job_id):
1229 """Grab logs from the host where a job was crashed.
1230
1231 First, check if PRIOR_LOGS_DIR exists in the host.
1232 If yes, collect them.
1233 Otherwise, check if a lab-machine marker (_LAB_MACHINE_FILE) exists
1234 in the host.
1235 If yes, the host was repaired automatically, and we collect normal
1236 system logs.
1237
1238 @param job_id: Id of the job that was crashed.
1239 """
1240 crashlogs_dir = crashcollect.get_crashinfo_dir(self,
1241 constants.CRASHLOGS_DEST_DIR_PREFIX)
1242 flag_prior_logs = False
1243
1244 if self.path_exists(client_constants.PRIOR_LOGS_DIR):
1245 flag_prior_logs = True
1246 self._collect_prior_logs(crashlogs_dir)
1247 elif self.path_exists(self._LAB_MACHINE_FILE):
1248 self._collect_system_logs(crashlogs_dir)
1249 else:
1250 logging.warning('%s: Host was manually re-installed without '
1251 '--lab_preserve_log option. Skip collecting '
1252 'crash-logs.', self._CRASHLOGS_PREFIX)
1253
1254 # We make crash collection be one-time effort.
1255 # _collect_prior_logs() and _collect_system_logs() will not throw
1256 # any exception, and following codes will be executed even when
1257 # those methods fail.
1258 # _collect_crashlogs() is called only when the host is up (refer
1259 # to try_collect_crashlogs()). We assume _collect_prior_logs() and
1260 # _collect_system_logs() fail rarely when the host is up.
1261 # In addition, it is not clear how many times we should try crash
1262 # collection again while not triggering next repair unnecessarily.
1263 # Threfore, we try crash collection one time.
1264
1265 # Create a marker file as soon as log collection is done.
1266 # Leave the job id to this marker for gs_offloader to consume.
1267 marker_file = os.path.join(crashlogs_dir, constants.CRASHLOGS_MARKER)
1268 with open(marker_file, 'a') as f:
1269 f.write('%s\n' % job_id)
1270
1271 # Remove need_crash_logs attribute
1272 logging.debug('%s: Remove attribute need_crash_logs from host %s',
1273 self._CRASHLOGS_PREFIX, self.hostname)
1274 self._AFE.set_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1275 None, hostname=self.hostname)
1276
1277 if flag_prior_logs:
1278 logging.debug('%s: Remove %s from host %s', self._CRASHLOGS_PREFIX,
1279 client_constants.PRIOR_LOGS_DIR, self.hostname)
1280 self.run('rm -rf %s; sync' % client_constants.PRIOR_LOGS_DIR)
1281 # Wait for a few seconds to make sure the prior command is
1282 # done deep through storage.
1283 time.sleep(self._SAFE_WAIT_SECS)
1284
1285
1286 def _collect_prior_logs(self, crashlogs_dir):
1287 """Grab prior logs that were stashed before re-installing a host.
1288
1289 @param crashlogs_dir: Directory path where crash-logs are stored.
1290 """
1291 logging.debug('%s: Found %s, collecting them...',
1292 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1293 try:
1294 self.collect_logs(client_constants.PRIOR_LOGS_DIR,
1295 crashlogs_dir, False)
1296 logging.debug('%s: %s is collected',
1297 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1298 except Exception as e:
1299 logging.error('%s: Failed to collect %s: %s',
1300 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR,
1301 e)
1302
1303
1304 def _collect_system_logs(self, crashlogs_dir):
1305 """Grab normal system logs from a host.
1306
1307 @param crashlogs_dir: Directory path where crash-logs are stored.
1308 """
1309 logging.debug('%s: Found %s, collecting system logs...',
1310 self._CRASHLOGS_PREFIX, self._LAB_MACHINE_FILE)
1311 sources = server_utils.parse_simple_config(self._LOGS_TO_COLLECT_FILE)
1312 for src in sources:
1313 try:
1314 if self.path_exists(src):
1315 logging.debug('%s: Collecting %s...',
1316 self._CRASHLOGS_PREFIX, src)
1317 dest = server_utils.concat_path_except_last(
1318 crashlogs_dir, src)
1319 self.collect_logs(src, dest, False)
1320 logging.debug('%s: %s is collected',
1321 self._CRASHLOGS_PREFIX, src)
1322 except Exception as e:
1323 logging.error('%s: Failed to collect %s: %s',
1324 self._CRASHLOGS_PREFIX, src, e)
1325
1326
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001327 def close(self):
beeps32a63082013-08-22 14:02:29 -07001328 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -07001329 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001330
1331
Dan Shi49ca0932014-11-14 11:22:27 -08001332 def get_power_supply_info(self):
1333 """Get the output of power_supply_info.
1334
1335 power_supply_info outputs the info of each power supply, e.g.,
1336 Device: Line Power
1337 online: no
1338 type: Mains
1339 voltage (V): 0
1340 current (A): 0
1341 Device: Battery
1342 state: Discharging
1343 percentage: 95.9276
1344 technology: Li-ion
1345
1346 Above output shows two devices, Line Power and Battery, with details of
1347 each device listed. This function parses the output into a dictionary,
1348 with key being the device name, and value being a dictionary of details
1349 of the device info.
1350
1351 @return: The dictionary of power_supply_info, e.g.,
1352 {'Line Power': {'online': 'yes', 'type': 'main'},
1353 'Battery': {'vendor': 'xyz', 'percentage': '100'}}
Dan Shie9b765d2014-12-29 16:59:49 -08001354 @raise error.AutoservRunError if power_supply_info tool is not found in
1355 the DUT. Caller should handle this error to avoid false failure
1356 on verification.
Dan Shi49ca0932014-11-14 11:22:27 -08001357 """
1358 result = self.run('power_supply_info').stdout.strip()
1359 info = {}
1360 device_name = None
1361 device_info = {}
1362 for line in result.split('\n'):
1363 pair = [v.strip() for v in line.split(':')]
1364 if len(pair) != 2:
1365 continue
1366 if pair[0] == 'Device':
1367 if device_name:
1368 info[device_name] = device_info
1369 device_name = pair[1]
1370 device_info = {}
1371 else:
1372 device_info[pair[0]] = pair[1]
1373 if device_name and not device_name in info:
1374 info[device_name] = device_info
1375 return info
1376
1377
1378 def get_battery_percentage(self):
1379 """Get the battery percentage.
1380
1381 @return: The percentage of battery level, value range from 0-100. Return
1382 None if the battery info cannot be retrieved.
1383 """
1384 try:
1385 info = self.get_power_supply_info()
1386 logging.info(info)
1387 return float(info['Battery']['percentage'])
Dan Shie9b765d2014-12-29 16:59:49 -08001388 except (KeyError, ValueError, error.AutoservRunError):
Dan Shi49ca0932014-11-14 11:22:27 -08001389 return None
1390
1391
1392 def is_ac_connected(self):
1393 """Check if the dut has power adapter connected and charging.
1394
1395 @return: True if power adapter is connected and charging.
1396 """
1397 try:
1398 info = self.get_power_supply_info()
1399 return info['Line Power']['online'] == 'yes'
Dan Shie9b765d2014-12-29 16:59:49 -08001400 except (KeyError, error.AutoservRunError):
1401 return None
Dan Shi49ca0932014-11-14 11:22:27 -08001402
1403
Simran Basi5e6339a2013-03-21 11:34:32 -07001404 def _cleanup_poweron(self):
1405 """Special cleanup method to make sure hosts always get power back."""
1406 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1407 hosts = afe.get_hosts(hostname=self.hostname)
1408 if not hosts or not (self._RPM_OUTLET_CHANGED in
1409 hosts[0].attributes):
1410 return
1411 logging.debug('This host has recently interacted with the RPM'
1412 ' Infrastructure. Ensuring power is on.')
1413 try:
1414 self.power_on()
Dan Shi7dca56e2014-11-11 17:07:56 -08001415 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1416 hostname=self.hostname)
Simran Basi5e6339a2013-03-21 11:34:32 -07001417 except rpm_client.RemotePowerException:
Simran Basi5e6339a2013-03-21 11:34:32 -07001418 logging.error('Failed to turn Power On for this host after '
1419 'cleanup through the RPM Infrastructure.')
Gabe Blackb72f4fb2015-01-20 16:47:13 -08001420 autotest_es.post(
Dan Shi7dca56e2014-11-11 17:07:56 -08001421 type_str='RPM_poweron_failure',
1422 metadata={'hostname': self.hostname})
Dan Shi49ca0932014-11-14 11:22:27 -08001423
1424 battery_percentage = self.get_battery_percentage()
Dan Shif01ebe22014-12-05 13:10:57 -08001425 if battery_percentage and battery_percentage < 50:
Dan Shi49ca0932014-11-14 11:22:27 -08001426 raise
1427 elif self.is_ac_connected():
1428 logging.info('The device has power adapter connected and '
1429 'charging. No need to try to turn RPM on '
1430 'again.')
1431 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1432 hostname=self.hostname)
1433 logging.info('Battery level is now at %s%%. The device may '
1434 'still have enough power to run test, so no '
1435 'exception will be raised.', battery_percentage)
1436
Simran Basi5e6339a2013-03-21 11:34:32 -07001437
beepsc87ff602013-07-31 21:53:00 -07001438 def _is_factory_image(self):
1439 """Checks if the image on the DUT is a factory image.
1440
1441 @return: True if the image on the DUT is a factory image.
1442 False otherwise.
1443 """
1444 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1445 return result.exit_status == 0
1446
1447
1448 def _restart_ui(self):
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001449 """Restart the Chrome UI.
beepsc87ff602013-07-31 21:53:00 -07001450
1451 @raises: FactoryImageCheckerException for factory images, since
1452 we cannot attempt to restart ui on them.
1453 error.AutoservRunError for any other type of error that
1454 occurs while restarting ui.
1455 """
1456 if self._is_factory_image():
1457 raise FactoryImageCheckerException('Cannot restart ui on factory '
1458 'images')
1459
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001460 # TODO(jrbarnette): The command to stop/start the ui job
1461 # should live inside cros_ui, too. However that would seem
1462 # to imply interface changes to the existing start()/restart()
1463 # functions, which is a bridge too far (for now).
1464 prompt = cros_ui.get_login_prompt_state(self)
1465 self.run('stop ui; start ui')
1466 cros_ui.wait_for_chrome_ready(prompt, self)
beepsc87ff602013-07-31 21:53:00 -07001467
1468
1469 def cleanup(self):
MK Ryu35d661e2014-09-25 17:44:10 -07001470 self.run('rm -f %s' % client_constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001471 try:
beepsc87ff602013-07-31 21:53:00 -07001472 self._restart_ui()
1473 except (error.AutotestRunError, error.AutoservRunError,
1474 FactoryImageCheckerException):
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001475 logging.warning('Unable to restart ui, rebooting device.')
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001476 # Since restarting the UI fails fall back to normal Autotest
1477 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001478 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001479 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001480 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001481 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001482
1483
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001484 def reboot(self, **dargs):
1485 """
1486 This function reboots the site host. The more generic
1487 RemoteHost.reboot() performs sync and sleeps for 5
1488 seconds. This is not necessary for Chrome OS devices as the
1489 sync should be finished in a short time during the reboot
1490 command.
1491 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001492 if 'reboot_cmd' not in dargs:
Doug Anderson7d5aeb22014-02-27 15:12:17 -08001493 reboot_timeout = dargs.get('reboot_timeout', 10)
1494 dargs['reboot_cmd'] = ('((reboot & sleep %d; reboot -f &)'
1495 ' </dev/null >/dev/null 2>&1 &)' %
1496 reboot_timeout)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001497 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001498 if 'fastsync' not in dargs:
1499 dargs['fastsync'] = True
Michael Liangda8c60a2014-06-03 13:24:51 -07001500
Charlie Mooneya8e6dab2014-05-29 14:37:55 -07001501 # For purposes of logging reboot times:
1502 # Get the board name i.e. 'daisy_spring'
Michael Liangca4f5a62014-07-10 15:45:13 -07001503 board_fullname = self.get_board()
1504
1505 # Strip the prefix and add it to dargs.
1506 dargs['board'] = board_fullname[board_fullname.find(':')+1:]
Fang Deng0ca40e22013-08-27 17:47:44 -07001507 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001508
1509
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07001510 def suspend(self, **dargs):
1511 """
1512 This function suspends the site host.
1513 """
1514 suspend_time = dargs.get('suspend_time', 60)
1515 dargs['timeout'] = suspend_time
1516 if 'suspend_cmd' not in dargs:
1517 cmd = ' && '.join(['echo 0 > /sys/class/rtc/rtc0/wakealarm',
1518 'echo +%d > /sys/class/rtc/rtc0/wakealarm' % suspend_time,
1519 'powerd_dbus_suspend --delay=0 &'])
1520 dargs['suspend_cmd'] = ('(( %s )'
1521 '< /dev/null >/dev/null 2>&1 &)' % cmd)
1522 super(CrosHost, self).suspend(**dargs)
1523
1524
Simran Basiec564392014-08-25 16:48:09 -07001525 def upstart_status(self, service_name):
1526 """Check the status of an upstart init script.
1527
1528 @param service_name: Service to look up.
1529
1530 @returns True if the service is running, False otherwise.
1531 """
1532 return self.run('status %s | grep start/running' %
1533 service_name).stdout.strip() != ''
1534
1535
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001536 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001537 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001538
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001539 Tests for the following conditions:
1540 1. All conditions tested by the parent version of this
1541 function.
1542 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001543 3. Sufficient space in /mnt/stateful_partition/encrypted.
1544 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001545
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001546 """
MK Ryu35d661e2014-09-25 17:44:10 -07001547 # Check if a job was crashed on this host.
1548 # If yes, avoid verification until crash-logs are collected.
1549 if self._need_crash_logs():
1550 raise error.AutoservCrashLogCollectRequired(
1551 'Need to collect crash-logs before verification')
1552
Fang Deng0ca40e22013-08-27 17:47:44 -07001553 super(CrosHost, self).verify_software()
J. Richard Barnette4164d1d2014-12-02 17:52:33 -08001554 self.check_inodes(
1555 '/mnt/stateful_partition',
1556 global_config.global_config.get_config_value(
1557 'SERVER', 'kilo_inodes_required', type=int,
1558 default=100))
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001559 self.check_diskspace(
1560 '/mnt/stateful_partition',
1561 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001562 'SERVER', 'gb_diskspace_required', type=float,
1563 default=20.0))
Gaurav Shahe448af82014-06-19 15:18:59 -07001564 encrypted_stateful_path = '/mnt/stateful_partition/encrypted'
1565 # Not all targets build with encrypted stateful support.
1566 if self.path_exists(encrypted_stateful_path):
1567 self.check_diskspace(
1568 encrypted_stateful_path,
1569 global_config.global_config.get_config_value(
1570 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1571 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001572
Simran Basiec564392014-08-25 16:48:09 -07001573 if not self.upstart_status('system-services'):
Prashanth B5d0a0512014-04-25 12:26:08 -07001574 raise error.AutoservError('Chrome failed to reach login. '
1575 'System services not running.')
1576
beepsc87ff602013-07-31 21:53:00 -07001577 # Factory images don't run update engine,
1578 # goofy controls dbus on these DUTs.
1579 if not self._is_factory_image():
1580 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001581 # Makes sure python is present, loads and can use built in functions.
1582 # We have seen cases where importing cPickle fails with undefined
1583 # symbols in cPickle.so.
1584 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001585
1586
Dan Shi49ca0932014-11-14 11:22:27 -08001587 def verify_hardware(self):
1588 """Verify hardware system of a Chrome OS system.
1589
1590 Check following hardware conditions:
1591 1. Battery level.
1592 2. Is power adapter connected.
1593 """
1594 logging.info('Battery percentage: %s', self.get_battery_percentage())
Dan Shie9b765d2014-12-29 16:59:49 -08001595 if self.is_ac_connected() is None:
1596 logging.info('Can not determine if the device has power adapter '
1597 'connected.')
1598 else:
1599 logging.info('Device %s power adapter connected and charging.',
1600 'has' if self.is_ac_connected() else 'does not have')
Dan Shi49ca0932014-11-14 11:22:27 -08001601
1602
Fang Deng96667ca2013-08-01 17:46:18 -07001603 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1604 connect_timeout=None, alive_interval=None):
1605 """Override default make_ssh_command to use options tuned for Chrome OS.
1606
1607 Tuning changes:
1608 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1609 connection failure. Consistency with remote_access.sh.
1610
1611 - ServerAliveInterval=180; which causes SSH to ping connection every
1612 180 seconds. In conjunction with ServerAliveCountMax ensures
1613 that if the connection dies, Autotest will bail out quickly.
1614 Originally tried 60 secs, but saw frequent job ABORTS where
1615 the test completed successfully.
1616
1617 - ServerAliveCountMax=3; consistency with remote_access.sh.
1618
1619 - ConnectAttempts=4; reduce flakiness in connection errors;
1620 consistency with remote_access.sh.
1621
1622 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1623 Host keys change with every new installation, don't waste
1624 memory/space saving them.
1625
1626 - SSH protocol forced to 2; needed for ServerAliveInterval.
1627
1628 @param user User name to use for the ssh connection.
1629 @param port Port on the target host to use for ssh connection.
1630 @param opts Additional options to the ssh command.
1631 @param hosts_file Ignored.
1632 @param connect_timeout Ignored.
1633 @param alive_interval Ignored.
1634 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001635 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1636 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001637 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1638 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1639 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1640 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001641 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1642 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001643
1644
beeps32a63082013-08-22 14:02:29 -07001645 def _create_ssh_tunnel(self, port, local_port):
1646 """Create an ssh tunnel from local_port to port.
1647
1648 @param port: remote port on the host.
1649 @param local_port: local forwarding port.
1650
1651 @return: the tunnel process.
1652 """
1653 # Chrome OS on the target closes down most external ports
1654 # for security. We could open the port, but doing that
1655 # would conflict with security tests that check that only
1656 # expected ports are open. So, to get to the port on the
1657 # target we use an ssh tunnel.
1658 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1659 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1660 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1661 logging.debug('Full tunnel command: %s', tunnel_cmd)
1662 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1663 logging.debug('Started ssh tunnel, local = %d'
1664 ' remote = %d, pid = %d',
1665 local_port, port, tunnel_proc.pid)
1666 return tunnel_proc
1667
1668
Christopher Wileydd181852013-10-10 19:56:58 -07001669 def _setup_rpc(self, port, command_name, remote_pid=None):
beeps32a63082013-08-22 14:02:29 -07001670 """Sets up a tunnel process and performs rpc connection book keeping.
1671
1672 This method assumes that xmlrpc and jsonrpc never conflict, since
1673 we can only either have an xmlrpc or a jsonrpc server listening on
1674 a remote port. As such, it enforces a single proxy->remote port
1675 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1676 and then tries to start an xmlrpc proxy forwarded to the same port,
1677 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1678
1679 1. None of the methods on the xmlrpc proxy will work because
1680 the server listening on B is jsonrpc.
1681
1682 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1683 server, as the only use case currently is goofy, which is tied to
1684 the factory image. It is much easier to handle a failed xmlrpc
1685 call on the client than it is to terminate goofy in this scenario,
1686 as doing the latter might leave the DUT in a hard to recover state.
1687
1688 With the current implementation newer rpc proxy connections will
1689 terminate the tunnel processes of older rpc connections tunneling
1690 to the same remote port. If methods are invoked on the client
1691 after this has happened they will fail with connection closed errors.
1692
1693 @param port: The remote forwarding port.
1694 @param command_name: The name of the remote process, to terminate
1695 using pkill.
1696
1697 @return A url that we can use to initiate the rpc connection.
1698 """
1699 self.rpc_disconnect(port)
1700 local_port = utils.get_unused_port()
1701 tunnel_proc = self._create_ssh_tunnel(port, local_port)
Christopher Wileydd181852013-10-10 19:56:58 -07001702 self._rpc_proxy_map[port] = (command_name, tunnel_proc, remote_pid)
beeps32a63082013-08-22 14:02:29 -07001703 return self._RPC_PROXY_URL % local_port
1704
1705
Christopher Wileyd78249a2013-03-01 13:05:31 -08001706 def xmlrpc_connect(self, command, port, command_name=None,
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001707 ready_test_name=None, timeout_seconds=10,
1708 logfile='/dev/null'):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001709 """Connect to an XMLRPC server on the host.
1710
1711 The `command` argument should be a simple shell command that
1712 starts an XMLRPC server on the given `port`. The command
1713 must not daemonize, and must terminate cleanly on SIGTERM.
1714 The command is started in the background on the host, and a
1715 local XMLRPC client for the server is created and returned
1716 to the caller.
1717
1718 Note that the process of creating an XMLRPC client makes no
1719 attempt to connect to the remote server; the caller is
1720 responsible for determining whether the server is running
1721 correctly, and is ready to serve requests.
1722
Christopher Wileyd78249a2013-03-01 13:05:31 -08001723 Optionally, the caller can pass ready_test_name, a string
1724 containing the name of a method to call on the proxy. This
1725 method should take no parameters and return successfully only
1726 when the server is ready to process client requests. When
1727 ready_test_name is set, xmlrpc_connect will block until the
1728 proxy is ready, and throw a TestError if the server isn't
1729 ready by timeout_seconds.
1730
beeps32a63082013-08-22 14:02:29 -07001731 If a server is already running on the remote port, this
1732 method will kill it and disconnect the tunnel process
1733 associated with the connection before establishing a new one,
1734 by consulting the rpc_proxy_map in rpc_disconnect.
1735
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001736 @param command Shell command to start the server.
1737 @param port Port number on which the server is expected to
1738 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001739 @param command_name String to use as input to `pkill` to
1740 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001741 @param ready_test_name String containing the name of a
1742 method defined on the XMLRPC server.
1743 @param timeout_seconds Number of seconds to wait
1744 for the server to become 'ready.' Will throw a
1745 TestFail error if server is not ready in time.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001746 @param logfile Logfile to send output when running
1747 'command' argument.
Yusuf Mohsinally8d19e3c2013-11-21 14:25:45 -08001748
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001749 """
Christopher Wileyc14f06a2013-10-16 13:55:39 -07001750 # Clean up any existing state. If the caller is willing
1751 # to believe their server is down, we ought to clean up
1752 # any tunnels we might have sitting around.
1753 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001754 # Start the server on the host. Redirection in the command
1755 # below is necessary, because 'ssh' won't terminate until
1756 # background child processes close stdin, stdout, and
1757 # stderr.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001758 remote_cmd = '%s </dev/null >%s 2>&1 & echo $!' % (command, logfile)
Christopher Wileydd181852013-10-10 19:56:58 -07001759 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001760 logging.debug('Started XMLRPC server on host %s, pid = %s',
1761 self.hostname, remote_pid)
1762
Christopher Wileydd181852013-10-10 19:56:58 -07001763 # Tunnel through SSH to be able to reach that remote port.
1764 rpc_url = self._setup_rpc(port, command_name, remote_pid=remote_pid)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001765 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001766
Christopher Wileyd78249a2013-03-01 13:05:31 -08001767 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001768 # retry.retry logs each attempt; calculate delay_sec to
1769 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001770 @retry.retry((socket.error,
1771 xmlrpclib.ProtocolError,
1772 httplib.BadStatusLine),
Chris Sosa65425082013-10-16 13:26:22 -07001773 timeout_min=timeout_seconds / 60.0,
1774 delay_sec=min(max(timeout_seconds / 20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001775 def ready_test():
1776 """ Call proxy.ready_test_name(). """
1777 getattr(proxy, ready_test_name)()
1778 successful = False
1779 try:
1780 logging.info('Waiting %d seconds for XMLRPC server '
1781 'to start.', timeout_seconds)
1782 ready_test()
1783 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001784 finally:
1785 if not successful:
1786 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001787 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001788 logging.info('XMLRPC server started successfully.')
1789 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001790
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001791
Jason Abeleb6f924f2013-11-13 16:01:54 -08001792 def syslog(self, message, tag='autotest'):
1793 """Logs a message to syslog on host.
1794
1795 @param message String message to log into syslog
1796 @param tag String tag prefix for syslog
1797
1798 """
1799 self.run('logger -t "%s" "%s"' % (tag, message))
1800
1801
beeps32a63082013-08-22 14:02:29 -07001802 def jsonrpc_connect(self, port):
1803 """Creates a jsonrpc proxy connection through an ssh tunnel.
1804
1805 This method exists to facilitate communication with goofy (which is
1806 the default system manager on all factory images) and as such, leaves
1807 most of the rpc server sanity checking to the caller. Unlike
1808 xmlrpc_connect, this method does not facilitate the creation of a remote
1809 jsonrpc server, as the only clients of this code are factory tests,
1810 for which the goofy system manager is built in to the image and starts
1811 when the target boots.
1812
1813 One can theoretically create multiple jsonrpc proxies all forwarded
1814 to the same remote port, provided the remote port has an rpc server
1815 listening. However, in doing so we stand the risk of leaking an
1816 existing tunnel process, so we always disconnect any older tunnels
1817 we might have through rpc_disconnect.
1818
1819 @param port: port on the remote host that is serving this proxy.
1820
1821 @return: The client proxy.
1822 """
1823 if not jsonrpclib:
1824 logging.warning('Jsonrpclib could not be imported. Check that '
1825 'site-packages contains jsonrpclib.')
1826 return None
1827
1828 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1829
1830 logging.info('Established a jsonrpc connection through port %s.', port)
1831 return proxy
1832
1833
1834 def rpc_disconnect(self, port):
1835 """Disconnect from an RPC server on the host.
1836
1837 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001838 the given `port`. Also closes the local ssh tunnel created
1839 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001840 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001841 client object; however disconnection will cause all
1842 subsequent calls to methods on the object to fail.
1843
1844 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001845 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001846
1847 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001848 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001849 """
beeps32a63082013-08-22 14:02:29 -07001850 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001851 return
Christopher Wileydd181852013-10-10 19:56:58 -07001852 remote_name, tunnel_proc, remote_pid = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001853 if remote_name:
1854 # We use 'pkill' to find our target process rather than
1855 # a PID, because the host may have rebooted since
1856 # connecting, and we don't want to kill an innocent
1857 # process with the same PID.
1858 #
1859 # 'pkill' helpfully exits with status 1 if no target
1860 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001861 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001862 # status.
1863 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001864 if remote_pid:
1865 logging.info('Waiting for RPC server "%s" shutdown',
1866 remote_name)
1867 start_time = time.time()
1868 while (time.time() - start_time <
1869 self._RPC_SHUTDOWN_TIMEOUT_SECONDS):
1870 running_processes = self.run(
1871 "pgrep -f '%s'" % remote_name,
1872 ignore_status=True).stdout.split()
1873 if not remote_pid in running_processes:
1874 logging.info('Shut down RPC server.')
1875 break
1876 time.sleep(self._RPC_SHUTDOWN_POLLING_PERIOD_SECONDS)
1877 else:
1878 raise error.TestError('Failed to shutdown RPC server %s' %
1879 remote_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001880
1881 if tunnel_proc.poll() is None:
1882 tunnel_proc.terminate()
1883 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1884 else:
1885 logging.debug('Tunnel pid %d terminated early, status %d',
1886 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001887 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001888
1889
beeps32a63082013-08-22 14:02:29 -07001890 def rpc_disconnect_all(self):
1891 """Disconnect all known RPC proxy ports."""
1892 for port in self._rpc_proxy_map.keys():
1893 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001894
1895
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001896 def poor_mans_rpc(self, fun):
1897 """
1898 Calls a function from client utils on the host and returns a string.
1899
1900 @param fun function in client utils namespace.
1901 @return output string from calling fun.
1902 """
Simran Basi263a9d32014-08-19 11:16:51 -07001903 script = 'cd %s/bin; ' % autotest.Autotest.get_installed_autodir(self)
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001904 script += 'python -c "import common; import utils;'
1905 script += 'print utils.%s"' % fun
1906 return script
1907
1908
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001909 def _ping_check_status(self, status):
1910 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001911
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001912 @param status Check the ping status against this value.
1913 @return True iff `status` and the result of ping are the same
1914 (i.e. both True or both False).
1915
1916 """
1917 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1918 return not (status ^ (ping_val == 0))
1919
1920 def _ping_wait_for_status(self, status, timeout):
1921 """Wait for the host to have a given status (UP or DOWN).
1922
1923 Status is checked by polling. Polling will not last longer
1924 than the number of seconds in `timeout`. The polling
1925 interval will be long enough that only approximately
1926 _PING_WAIT_COUNT polling cycles will be executed, subject
1927 to a maximum interval of about one minute.
1928
1929 @param status Waiting will stop immediately if `ping` of the
1930 host returns this status.
1931 @param timeout Poll for at most this many seconds.
1932 @return True iff the host status from `ping` matched the
1933 requested status at the time of return.
1934
1935 """
1936 # _ping_check_status() takes about 1 second, hence the
1937 # "- 1" in the formula below.
1938 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1939 end_time = time.time() + timeout
1940 while time.time() <= end_time:
1941 if self._ping_check_status(status):
1942 return True
1943 if poll_interval > 0:
1944 time.sleep(poll_interval)
1945
1946 # The last thing we did was sleep(poll_interval), so it may
1947 # have been too long since the last `ping`. Check one more
1948 # time, just to be sure.
1949 return self._ping_check_status(status)
1950
1951 def ping_wait_up(self, timeout):
1952 """Wait for the host to respond to `ping`.
1953
1954 N.B. This method is not a reliable substitute for
1955 `wait_up()`, because a host that responds to ping will not
1956 necessarily respond to ssh. This method should only be used
1957 if the target DUT can be considered functional even if it
1958 can't be reached via ssh.
1959
1960 @param timeout Minimum time to allow before declaring the
1961 host to be non-responsive.
1962 @return True iff the host answered to ping before the timeout.
1963
1964 """
1965 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001966
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001967 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001968 """Wait until the host no longer responds to `ping`.
1969
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001970 This function can be used as a slightly faster version of
1971 `wait_down()`, by avoiding potentially long ssh timeouts.
1972
1973 @param timeout Minimum time to allow for the host to become
1974 non-responsive.
1975 @return True iff the host quit answering ping before the
1976 timeout.
1977
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001978 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001979 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001980
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001981 def test_wait_for_sleep(self, sleep_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001982 """Wait for the client to enter low-power sleep mode.
1983
1984 The test for "is asleep" can't distinguish a system that is
1985 powered off; to confirm that the unit was asleep, it is
1986 necessary to force resume, and then call
1987 `test_wait_for_resume()`.
1988
1989 This function is expected to be called from a test as part
1990 of a sequence like the following:
1991
1992 ~~~~~~~~
1993 boot_id = host.get_boot_id()
1994 # trigger sleep on the host
1995 host.test_wait_for_sleep()
1996 # trigger resume on the host
1997 host.test_wait_for_resume(boot_id)
1998 ~~~~~~~~
1999
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002000 @param sleep_timeout time limit in seconds to allow the host sleep.
2001
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002002 @exception TestFail The host did not go to sleep within
2003 the allowed time.
2004 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002005 if sleep_timeout is None:
2006 sleep_timeout = self.SLEEP_TIMEOUT
2007
2008 if not self.ping_wait_down(timeout=sleep_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002009 raise error.TestFail(
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002010 'client failed to sleep after %d seconds' % sleep_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002011
2012
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002013 def test_wait_for_resume(self, old_boot_id, resume_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002014 """Wait for the client to resume from low-power sleep mode.
2015
2016 The `old_boot_id` parameter should be the value from
2017 `get_boot_id()` obtained prior to entering sleep mode. A
2018 `TestFail` exception is raised if the boot id changes.
2019
2020 See @ref test_wait_for_sleep for more on this function's
2021 usage.
2022
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002023 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002024 target host went to sleep.
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002025 @param resume_timeout time limit in seconds to allow the host up.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002026
2027 @exception TestFail The host did not respond within the
2028 allowed time.
2029 @exception TestFail The host responded, but the boot id test
2030 indicated a reboot rather than a sleep
2031 cycle.
2032 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002033 if resume_timeout is None:
2034 resume_timeout = self.RESUME_TIMEOUT
2035
2036 if not self.wait_up(timeout=resume_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002037 raise error.TestFail(
2038 'client failed to resume from sleep after %d seconds' %
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002039 resume_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002040 else:
2041 new_boot_id = self.get_boot_id()
2042 if new_boot_id != old_boot_id:
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002043 logging.error('client rebooted (old boot %s, new boot %s)',
2044 old_boot_id, new_boot_id)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002045 raise error.TestFail(
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002046 'client rebooted, but sleep was expected')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002047
2048
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002049 def test_wait_for_shutdown(self, shutdown_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002050 """Wait for the client to shut down.
2051
2052 The test for "has shut down" can't distinguish a system that
2053 is merely asleep; to confirm that the unit was down, it is
2054 necessary to force boot, and then call test_wait_for_boot().
2055
2056 This function is expected to be called from a test as part
2057 of a sequence like the following:
2058
2059 ~~~~~~~~
2060 boot_id = host.get_boot_id()
2061 # trigger shutdown on the host
2062 host.test_wait_for_shutdown()
2063 # trigger boot on the host
2064 host.test_wait_for_boot(boot_id)
2065 ~~~~~~~~
2066
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002067 @param shutdown_timeout time limit in seconds to allow the host down.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002068 @exception TestFail The host did not shut down within the
2069 allowed time.
2070 """
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002071 if shutdown_timeout is None:
2072 shutdown_timeout = self.SHUTDOWN_TIMEOUT
2073
2074 if not self.ping_wait_down(timeout=shutdown_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002075 raise error.TestFail(
2076 'client failed to shut down after %d seconds' %
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002077 shutdown_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002078
2079
2080 def test_wait_for_boot(self, old_boot_id=None):
2081 """Wait for the client to boot from cold power.
2082
2083 The `old_boot_id` parameter should be the value from
2084 `get_boot_id()` obtained prior to shutting down. A
2085 `TestFail` exception is raised if the boot id does not
2086 change. The boot id test is omitted if `old_boot_id` is not
2087 specified.
2088
2089 See @ref test_wait_for_shutdown for more on this function's
2090 usage.
2091
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002092 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002093 shut down.
2094
2095 @exception TestFail The host did not respond within the
2096 allowed time.
2097 @exception TestFail The host responded, but the boot id test
2098 indicated that there was no reboot.
2099 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002100 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002101 raise error.TestFail(
2102 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002103 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002104 elif old_boot_id:
2105 if self.get_boot_id() == old_boot_id:
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002106 logging.error('client not rebooted (boot %s)',
2107 old_boot_id)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002108 raise error.TestFail(
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002109 'client is back up, but did not reboot')
Simran Basid5e5e272012-09-24 15:23:59 -07002110
2111
2112 @staticmethod
2113 def check_for_rpm_support(hostname):
2114 """For a given hostname, return whether or not it is powered by an RPM.
2115
Simran Basi1df55112013-09-06 11:25:09 -07002116 @param hostname: hostname to check for rpm support.
2117
Simran Basid5e5e272012-09-24 15:23:59 -07002118 @return None if this host does not follows the defined naming format
2119 for RPM powered DUT's in the lab. If it does follow the format,
2120 it returns a regular expression MatchObject instead.
2121 """
Fang Dengbaff9082015-01-06 13:46:15 -08002122 return re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002123
2124
2125 def has_power(self):
2126 """For this host, return whether or not it is powered by an RPM.
2127
2128 @return True if this host is in the CROS lab and follows the defined
2129 naming format.
2130 """
Fang Deng0ca40e22013-08-27 17:47:44 -07002131 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002132
2133
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002134 def _set_power(self, state, power_method):
2135 """Sets the power to the host via RPM, Servo or manual.
2136
2137 @param state Specifies which power state to set to DUT
2138 @param power_method Specifies which method of power control to
2139 use. By default "RPM" will be used. Valid values
2140 are the strings "RPM", "manual", "servoj10".
2141
2142 """
2143 ACCEPTABLE_STATES = ['ON', 'OFF']
2144
2145 if state.upper() not in ACCEPTABLE_STATES:
2146 raise error.TestError('State must be one of: %s.'
2147 % (ACCEPTABLE_STATES,))
2148
2149 if power_method == self.POWER_CONTROL_SERVO:
2150 logging.info('Setting servo port J10 to %s', state)
2151 self.servo.set('prtctl3_pwren', state.lower())
2152 time.sleep(self._USB_POWER_TIMEOUT)
2153 elif power_method == self.POWER_CONTROL_MANUAL:
2154 logging.info('You have %d seconds to set the AC power to %s.',
2155 self._POWER_CYCLE_TIMEOUT, state)
2156 time.sleep(self._POWER_CYCLE_TIMEOUT)
2157 else:
2158 if not self.has_power():
2159 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07002160 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
2161 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
2162 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07002163 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07002164
2165
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002166 def power_off(self, power_method=POWER_CONTROL_RPM):
2167 """Turn off power to this host via RPM, Servo or manual.
2168
2169 @param power_method Specifies which method of power control to
2170 use. By default "RPM" will be used. Valid values
2171 are the strings "RPM", "manual", "servoj10".
2172
2173 """
2174 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07002175
2176
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002177 def power_on(self, power_method=POWER_CONTROL_RPM):
2178 """Turn on power to this host via RPM, Servo or manual.
2179
2180 @param power_method Specifies which method of power control to
2181 use. By default "RPM" will be used. Valid values
2182 are the strings "RPM", "manual", "servoj10".
2183
2184 """
2185 self._set_power('ON', power_method)
2186
2187
2188 def power_cycle(self, power_method=POWER_CONTROL_RPM):
2189 """Cycle power to this host by turning it OFF, then ON.
2190
2191 @param power_method Specifies which method of power control to
2192 use. By default "RPM" will be used. Valid values
2193 are the strings "RPM", "manual", "servoj10".
2194
2195 """
2196 if power_method in (self.POWER_CONTROL_SERVO,
2197 self.POWER_CONTROL_MANUAL):
2198 self.power_off(power_method=power_method)
2199 time.sleep(self._POWER_CYCLE_TIMEOUT)
2200 self.power_on(power_method=power_method)
2201 else:
2202 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002203
2204
2205 def get_platform(self):
2206 """Determine the correct platform label for this host.
2207
2208 @returns a string representing this host's platform.
2209 """
2210 crossystem = utils.Crossystem(self)
2211 crossystem.init()
2212 # Extract fwid value and use the leading part as the platform id.
2213 # fwid generally follow the format of {platform}.{firmware version}
2214 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
2215 platform = crossystem.fwid().split('.')[0].lower()
2216 # Newer platforms start with 'Google_' while the older ones do not.
2217 return platform.replace('google_', '')
2218
2219
Hung-ying Tyanb1328032014-04-01 14:18:54 +08002220 def get_architecture(self):
2221 """Determine the correct architecture label for this host.
2222
2223 @returns a string representing this host's architecture.
2224 """
2225 crossystem = utils.Crossystem(self)
2226 crossystem.init()
2227 return crossystem.arch()
2228
2229
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002230 def get_chrome_version(self):
2231 """Gets the Chrome version number and milestone as strings.
2232
2233 Invokes "chrome --version" to get the version number and milestone.
2234
2235 @return A tuple (chrome_ver, milestone) where "chrome_ver" is the
2236 current Chrome version number as a string (in the form "W.X.Y.Z")
2237 and "milestone" is the first component of the version number
2238 (the "W" from "W.X.Y.Z"). If the version number cannot be parsed
2239 in the "W.X.Y.Z" format, the "chrome_ver" will be the full output
2240 of "chrome --version" and the milestone will be the empty string.
2241
2242 """
MK Ryu35d661e2014-09-25 17:44:10 -07002243 version_string = self.run(client_constants.CHROME_VERSION_COMMAND).stdout
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002244 return utils.parse_chrome_version(version_string)
2245
Aviv Keshet74c89a92013-02-04 15:18:30 -08002246 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07002247 def get_board(self):
2248 """Determine the correct board label for this host.
2249
2250 @returns a string representing this host's board.
2251 """
2252 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
2253 run_method=self.run)
2254 board = release_info['CHROMEOS_RELEASE_BOARD']
2255 # Devices in the lab generally have the correct board name but our own
2256 # development devices have {board_name}-signed-{key_type}. The board
2257 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08002258 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07002259 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08002260 return board_format_string % board.split('-')[0]
2261 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07002262
2263
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002264 @label_decorator('board_freq_mem')
2265 def get_board_with_frequency_and_memory(self):
2266 """
2267 Determines the board name with frequency and memory.
2268
2269 @returns a more detailed string representing the board. Examples are
2270 butterfly_1.1GHz_2GB, link_1.8GHz_4GB, x86-zgb_1.7GHz_2GB
2271 """
2272 board = self.run(self.poor_mans_rpc(
2273 'get_board_with_frequency_and_memory()')).stdout
2274 return 'board_freq_mem:%s' % str.strip(board)
2275
2276
Aviv Keshet74c89a92013-02-04 15:18:30 -08002277 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002278 def has_lightsensor(self):
2279 """Determine the correct board label for this host.
2280
2281 @returns the string 'lightsensor' if this host has a lightsensor or
2282 None if it does not.
2283 """
2284 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08002285 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07002286 try:
2287 # Run the search cmd following the symlinks. Stderr_tee is set to
2288 # None as there can be a symlink loop, but the command will still
2289 # execute correctly with a few messages printed to stderr.
2290 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
2291 return 'lightsensor'
2292 except error.AutoservRunError:
2293 # egrep exited with a return code of 1 meaning none of the possible
2294 # lightsensor files existed.
2295 return None
2296
2297
Aviv Keshet74c89a92013-02-04 15:18:30 -08002298 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002299 def has_bluetooth(self):
2300 """Determine the correct board label for this host.
2301
2302 @returns the string 'bluetooth' if this host has bluetooth or
2303 None if it does not.
2304 """
2305 try:
2306 self.run('test -d /sys/class/bluetooth/hci0')
2307 # test exited with a return code of 0.
2308 return 'bluetooth'
2309 except error.AutoservRunError:
2310 # test exited with a return code 1 meaning the directory did not
2311 # exist.
2312 return None
2313
2314
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002315 @label_decorator('gpu_family')
2316 def get_gpu_family(self):
2317 """
2318 Determine GPU family.
2319
2320 @returns a string representing the gpu family. Examples are mali, tegra,
2321 pinetrail, sandybridge, ivybridge, haswell and baytrail.
2322 """
2323 gpu_family = self.run(self.poor_mans_rpc('get_gpu_family()')).stdout
2324 return 'gpu_family:%s' % str.strip(gpu_family)
2325
2326
Ilja Friedel0ce0b602013-08-15 18:45:27 -07002327 @label_decorator('graphics')
2328 def get_graphics(self):
2329 """
2330 Determine the correct board label for this host.
2331
2332 @returns a string representing this host's graphics. For now ARM boards
2333 return graphics:gles while all other boards return graphics:gl. This
2334 may change over time, but for robustness reasons this should avoid
2335 executing code in actual graphics libraries (which may not be ready and
2336 is tested by graphics_GLAPICheck).
2337 """
2338 uname = self.run('uname -a').stdout.lower()
2339 if 'arm' in uname:
2340 return 'graphics:gles'
2341 return 'graphics:gl'
2342
2343
Bill Richardson4f595f52014-02-13 16:20:26 -08002344 @label_decorator('ec')
2345 def get_ec(self):
2346 """
2347 Determine the type of EC on this host.
2348
2349 @returns a string representing this host's embedded controller type.
2350 At present, it only returns "ec:cros", for Chrome OS ECs. Other types
2351 of EC (or none) don't return any strings, since no tests depend on
2352 those.
2353 """
2354 cmd = 'mosys ec info'
2355 # The output should look like these, so that the last field should
2356 # match our EC version scheme:
2357 #
2358 # stm | stm32f100 | snow_v1.3.139-375eb9f
2359 # ti | Unknown-10de | peppy_v1.5.114-5d52788
2360 #
2361 # Non-Chrome OS ECs will look like these:
2362 #
2363 # ENE | KB932 | 00BE107A00
2364 # ite | it8518 | 3.08
2365 #
2366 # And some systems don't have ECs at all (Lumpy, for example).
2367 regexp = r'^.*\|\s*(\S+_v\d+\.\d+\.\d+-[0-9a-f]+)\s*$'
2368
2369 ecinfo = self.run(command=cmd, ignore_status=True)
2370 if ecinfo.exit_status == 0:
2371 res = re.search(regexp, ecinfo.stdout)
2372 if res:
2373 logging.info("EC version is %s", res.groups()[0])
2374 return 'ec:cros'
2375 logging.info("%s got: %s", cmd, ecinfo.stdout)
2376 # Has an EC, but it's not a Chrome OS EC
2377 return None
2378 logging.info("%s exited with status %d", cmd, ecinfo.exit_status)
2379 # No EC present
2380 return None
2381
2382
Alec Berg31b932b2014-04-04 16:09:11 -07002383 @label_decorator('accels')
2384 def get_accels(self):
2385 """
2386 Determine the type of accelerometers on this host.
2387
2388 @returns a string representing this host's accelerometer type.
2389 At present, it only returns "accel:cros-ec", for accelerometers
2390 attached to a Chrome OS EC, or none, if no accelerometers.
2391 """
2392 # Check to make sure we have ectool
2393 rv = self.run('which ectool', ignore_status=True)
2394 if rv.exit_status:
2395 logging.info("No ectool cmd found, assuming no EC accelerometers")
2396 return None
2397
2398 # Check that the EC supports the motionsense command
2399 rv = self.run('ectool motionsense', ignore_status=True)
2400 if rv.exit_status:
2401 logging.info("EC does not support motionsense command "
2402 "assuming no EC accelerometers")
2403 return None
2404
2405 # Check that EC motion sensors are active
2406 active = self.run('ectool motionsense active').stdout.split('\n')
2407 if active[0] == "0":
2408 logging.info("Motion sense inactive, assuming no EC accelerometers")
2409 return None
2410
2411 logging.info("EC accelerometers found")
2412 return 'accel:cros-ec'
2413
2414
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002415 @label_decorator('chameleon')
2416 def has_chameleon(self):
2417 """Determine if a Chameleon connected to this host.
2418
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002419 @returns a list containing two strings ('chameleon' and
2420 'chameleon:' + label, e.g. 'chameleon:hdmi') if this host
2421 has a Chameleon or None if it has not.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002422 """
2423 if self._chameleon_host:
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002424 return ['chameleon', 'chameleon:' + self.chameleon.get_label()]
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002425 else:
2426 return None
2427
2428
Cheng-Yi Chiangf4104ff2014-12-23 19:39:01 +08002429 @label_decorator('audio_loopback_dongle')
2430 def has_loopback_dongle(self):
2431 """Determine if an audio loopback dongle is plugged to this host.
2432
2433 @returns 'audio_loopback_dongle' when there is an audio loopback dongle
2434 plugged to this host.
2435 None when there is no audio loopback dongle
2436 plugged to this host.
2437 """
2438 server_info = self.run(command='cras_test_client --dump_s',
2439 ignore_status=True).stdout
2440 if (cras_utils.node_type_is_plugged('HEADPHONE', server_info) and
2441 cras_utils.node_type_is_plugged('MIC', server_info)):
2442 return 'audio_loopback_dongle'
2443 else:
2444 return None
2445
2446
Derek Basehorec71ff622014-07-07 15:18:40 -07002447 @label_decorator('power_supply')
2448 def get_power_supply(self):
2449 """
2450 Determine what type of power supply the host has
2451
2452 @returns a string representing this host's power supply.
2453 'power:battery' when the device has a battery intended for
2454 extended use
2455 'power:AC_primary' when the device has a battery not intended
2456 for extended use (for moving the machine, etc)
2457 'power:AC_only' when the device has no battery at all.
2458 """
2459 psu = self.run(command='mosys psu type', ignore_status=True)
2460 if psu.exit_status:
2461 # The psu command for mosys is not included for all platforms. The
2462 # assumption is that the device will have a battery if the command
2463 # is not found.
2464 return 'power:battery'
2465
2466 psu_str = psu.stdout.strip()
2467 if psu_str == 'unknown':
2468 return None
2469
2470 return 'power:%s' % psu_str
2471
2472
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002473 @label_decorator('storage')
2474 def get_storage(self):
2475 """
2476 Determine the type of boot device for this host.
2477
2478 Determine if the internal device is SCSI or dw_mmc device.
2479 Then check that it is SSD or HDD or eMMC or something else.
2480
2481 @returns a string representing this host's internal device type.
2482 'storage:ssd' when internal device is solid state drive
2483 'storage:hdd' when internal device is hard disk drive
2484 'storage:mmc' when internal device is mmc drive
2485 None When internal device is something else or
2486 when we are unable to determine the type
2487 """
2488 # The output should be /dev/mmcblk* for SD/eMMC or /dev/sd* for scsi
2489 rootdev_cmd = ' '.join(['. /usr/sbin/write_gpt.sh;',
2490 '. /usr/share/misc/chromeos-common.sh;',
2491 'load_base_vars;',
2492 'get_fixed_dst_drive'])
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002493 rootdev = self.run(command=rootdev_cmd, ignore_status=True)
2494 if rootdev.exit_status:
2495 logging.info("Fail to run %s", rootdev_cmd)
2496 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002497 rootdev_str = rootdev.stdout.strip()
2498
2499 if not rootdev_str:
2500 return None
2501
2502 rootdev_base = os.path.basename(rootdev_str)
2503
2504 mmc_pattern = '/dev/mmcblk[0-9]'
2505 if re.match(mmc_pattern, rootdev_str):
2506 # Use type to determine if the internal device is eMMC or somthing
2507 # else. We can assume that MMC is always an internal device.
2508 type_cmd = 'cat /sys/block/%s/device/type' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002509 type = self.run(command=type_cmd, ignore_status=True)
2510 if type.exit_status:
2511 logging.info("Fail to run %s", type_cmd)
2512 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002513 type_str = type.stdout.strip()
2514
2515 if type_str == 'MMC':
2516 return 'storage:mmc'
2517
2518 scsi_pattern = '/dev/sd[a-z]+'
2519 if re.match(scsi_pattern, rootdev.stdout):
2520 # Read symlink for /sys/block/sd* to determine if the internal
2521 # device is connected via ata or usb.
2522 link_cmd = 'readlink /sys/block/%s' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002523 link = self.run(command=link_cmd, ignore_status=True)
2524 if link.exit_status:
2525 logging.info("Fail to run %s", link_cmd)
2526 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002527 link_str = link.stdout.strip()
2528 if 'usb' in link_str:
2529 return None
2530
2531 # Read rotation to determine if the internal device is ssd or hdd.
2532 rotate_cmd = str('cat /sys/block/%s/queue/rotational'
2533 % rootdev_base)
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002534 rotate = self.run(command=rotate_cmd, ignore_status=True)
2535 if rotate.exit_status:
2536 logging.info("Fail to run %s", rotate_cmd)
2537 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002538 rotate_str = rotate.stdout.strip()
2539
2540 rotate_dict = {'0':'storage:ssd', '1':'storage:hdd'}
2541 return rotate_dict.get(rotate_str)
2542
2543 # All other internal device / error case will always fall here
2544 return None
2545
2546
Dan Shi4e9a2aa2014-03-24 14:28:42 -07002547 @label_decorator('servo')
2548 def get_servo(self):
2549 """Determine if the host has a servo attached.
2550
2551 If the host has a working servo attached, it should have a servo label.
2552
2553 @return: string 'servo' if the host has servo attached. Otherwise,
2554 returns None.
2555 """
2556 return 'servo' if self._servo_host else None
2557
2558
Dan Shi5beba472014-05-28 22:46:07 -07002559 @label_decorator('video_labels')
2560 def get_video_labels(self):
2561 """Run /usr/local/bin/avtest_label_detect to get a list of video labels.
2562
2563 Sample output of avtest_label_detect:
2564 Detected label: hw_video_acc_vp8
2565 Detected label: webcam
2566
2567 @return: A list of labels detected by tool avtest_label_detect.
2568 """
2569 try:
Simran Basi40ca8182014-07-17 18:41:20 -07002570 # TODO (sbasi) crbug.com/391081 - Remove once the proper fix has
2571 # landed and supporting images older than the fix is no longer
2572 # necessary.
2573 # Change back to VT1 so avtest_label_detect does not get stuck.
2574 self.run('chvt 1')
Dan Shi5beba472014-05-28 22:46:07 -07002575 result = self.run('/usr/local/bin/avtest_label_detect').stdout
2576 return re.findall('^Detected label: (\w+)$', result, re.M)
2577 except error.AutoservRunError:
2578 # The tool is not installed.
2579 return []
2580
2581
mussa584b4462014-06-20 15:13:28 -07002582 @label_decorator('video_glitch_detection')
2583 def is_video_glitch_detection_supported(self):
2584 """ Determine if a board under test is supported for video glitch
2585 detection tests.
2586
2587 @return: 'video_glitch_detection' if board is supported, None otherwise.
2588 """
2589 parser = ConfigParser.SafeConfigParser()
2590 filename = os.path.join(
2591 common.autotest_dir, 'client/cros/video/device_spec.conf')
2592
2593 dut = self.get_board().replace(ds_constants.BOARD_PREFIX, '')
2594
2595 try:
2596 parser.read(filename)
mussa584b4462014-06-20 15:13:28 -07002597 supported_boards = parser.sections()
2598
Mussa83c84d62014-10-02 12:11:28 -07002599 return 'video_glitch_detection' if dut in supported_boards else None
mussa584b4462014-06-20 15:13:28 -07002600
2601 except ConfigParser.error:
2602 # something went wrong while parsing the conf file
2603 return None
2604
Katherine Threlkeld7b97a9f2014-06-24 13:47:14 -07002605 @label_decorator('touch_labels')
2606 def get_touch(self):
2607 """
2608 Determine whether board under test has a touchpad or touchscreen.
2609
2610 @return: A list of some combination of 'touchscreen' and 'touchpad',
2611 depending on what is present on the device.
2612 """
2613 labels = []
2614 input_cmd = '/opt/google/input/inputcontrol --names -t %s'
2615 for elt in ['touchpad', 'touchscreen']:
2616 if self.run(input_cmd % elt).stdout:
2617 labels.append(elt)
2618 return labels
2619
2620
mussa584b4462014-06-20 15:13:28 -07002621
Simran Basic6f1f7a2012-10-16 10:47:46 -07002622 def get_labels(self):
2623 """Return a list of labels for this given host.
2624
2625 This is the main way to retrieve all the automatic labels for a host
2626 as it will run through all the currently implemented label functions.
2627 """
2628 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08002629 for label_function in self._LABEL_FUNCTIONS:
J. Richard Barnetteb869b222014-09-03 17:55:44 -07002630 try:
2631 label = label_function(self)
2632 except Exception as e:
2633 logging.error('Label function %s failed; ignoring it.',
2634 label_function.__name__)
2635 logging.exception(e)
2636 label = None
Simran Basic6f1f7a2012-10-16 10:47:46 -07002637 if label:
Dan Shi5beba472014-05-28 22:46:07 -07002638 if type(label) is str:
2639 labels.append(label)
2640 elif type(label) is list:
2641 labels.extend(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -07002642 return labels
Dan Shi85276d42014-04-08 22:11:45 -07002643
2644
2645 def is_boot_from_usb(self):
2646 """Check if DUT is boot from USB.
2647
2648 @return: True if DUT is boot from usb.
2649 """
2650 device = self.run('rootdev -s -d').stdout.strip()
2651 removable = int(self.run('cat /sys/block/%s/removable' %
2652 os.path.basename(device)).stdout.strip())
2653 return removable == 1
Helen Zhang17dae2b2014-11-11 09:25:52 -08002654
2655
2656 def read_from_meminfo(self, key):
Dan Shi49ca0932014-11-14 11:22:27 -08002657 """Return the memory info from /proc/meminfo
Helen Zhang17dae2b2014-11-11 09:25:52 -08002658
2659 @param key: meminfo requested
2660
2661 @return the memory value as a string
2662
2663 """
Helen Zhang17dae2b2014-11-11 09:25:52 -08002664 meminfo = self.run('grep %s /proc/meminfo' % key).stdout.strip()
2665 logging.debug('%s', meminfo)
2666 return int(re.search(r'\d+', meminfo).group(0))