blob: be179cae190e1ab237fd87bd50f06991f431b518 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
mussa584b4462014-06-20 15:13:28 -07005import ConfigParser
Aviv Keshet74c89a92013-02-04 15:18:30 -08006import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07007import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import logging
Dan Shi0f466e82013-02-22 15:44:58 -08009import os
Simran Basid5e5e272012-09-24 15:23:59 -070010import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080011import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070014import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070015
mussa584b4462014-06-20 15:13:28 -070016import common
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.bin import utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070018from autotest_lib.client.common_lib import autotemp
Richard Barnette0c73ffc2012-11-19 15:21:18 -080019from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080022from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080023from autotest_lib.client.common_lib.cros import retry
Gabe Blackb72f4fb2015-01-20 16:47:13 -080024from autotest_lib.client.common_lib.cros.graphite import autotest_es
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
MK Ryu35d661e2014-09-25 17:44:10 -070026from autotest_lib.client.cros import constants as client_constants
J. Richard Barnette84890bd2014-02-21 11:05:47 -080027from autotest_lib.client.cros import cros_ui
Cheng-Yi Chiangf4104ff2014-12-23 19:39:01 +080028from autotest_lib.client.cros.audio import cras_utils
MK Ryu35d661e2014-09-25 17:44:10 -070029from autotest_lib.server import autoserv_parser
30from autotest_lib.server import autotest
31from autotest_lib.server import constants
32from autotest_lib.server import crashcollect
Dan Shia1ecd5c2013-06-06 11:21:31 -070033from autotest_lib.server import utils as server_utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070034from autotest_lib.server.cros import provision
Scott Zawalski89c44dd2013-02-26 09:28:02 -050035from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070036from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Dan Shi9cb0eec2014-06-03 09:04:50 -070037from autotest_lib.server.cros.faft.config.config import Config as FAFTConfig
Fang Deng96667ca2013-08-01 17:46:18 -070038from autotest_lib.server.hosts import abstract_ssh
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +080039from autotest_lib.server.hosts import chameleon_host
Fang Deng5d518f42013-08-02 14:04:32 -070040from autotest_lib.server.hosts import servo_host
Simran Basidcff4252012-11-20 16:13:20 -080041from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070042
43
beeps32a63082013-08-22 14:02:29 -070044try:
45 import jsonrpclib
46except ImportError:
47 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070048
Fang Dengd1c2b732013-08-20 12:59:46 -070049
beepsc87ff602013-07-31 21:53:00 -070050class FactoryImageCheckerException(error.AutoservError):
51 """Exception raised when an image is a factory image."""
52 pass
53
54
Aviv Keshet74c89a92013-02-04 15:18:30 -080055def add_label_detector(label_function_list, label_list=None, label=None):
56 """Decorator used to group functions together into the provided list.
57 @param label_function_list: List of label detecting functions to add
58 decorated function to.
59 @param label_list: List of detectable labels to add detectable labels to.
60 (Default: None)
61 @param label: Label string that is detectable by this detection function
62 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080063 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070064 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080065 """
66 @param func: The function to be added as a detector.
67 """
68 label_function_list.append(func)
69 if label and label_list is not None:
70 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070071 return func
72 return add_func
73
74
Fang Deng0ca40e22013-08-27 17:47:44 -070075class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070076 """Chromium OS specific subclass of Host."""
77
78 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050079 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070080
Richard Barnette03a0c132012-11-05 12:40:35 -080081 # Timeout values (in seconds) associated with various Chrome OS
82 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070083 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080084 # In general, a good rule of thumb is that the timeout can be up
85 # to twice the typical measured value on the slowest platform.
86 # The times here have not necessarily been empirically tested to
87 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070088 #
89 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080090 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
91 # time to restart the netwowrk.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080092 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070093 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080094 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080095 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070096 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080097 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080098 # network.
beepsf079cfb2013-09-18 17:49:51 -070099 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800100 # POWERWASH_BOOT_TIMEOUT: Time to allow for a reboot that
101 # includes powerwash.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700102
103 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800104 RESUME_TIMEOUT = 10
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +0800105 SHUTDOWN_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700106 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700107 USB_BOOT_TIMEOUT = 150
J. Richard Barnette7817b052014-08-28 09:47:29 -0700108 INSTALL_TIMEOUT = 480
Dan Shi2c88eed2013-11-12 10:18:38 -0800109 POWERWASH_BOOT_TIMEOUT = 60
Chris Sosab76e0ee2013-05-22 16:55:41 -0700110
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800111 # REBOOT_TIMEOUT: How long to wait for a reboot.
112 #
Chris Sosab76e0ee2013-05-22 16:55:41 -0700113 # We have a long timeout to ensure we don't flakily fail due to other
114 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
Simran Basi1160e2c2013-10-04 16:00:24 -0700115 # TODO(sbasi - crbug.com/276094) Restore to 5 mins once the 'host did not
116 # return from reboot' bug is solved.
117 REBOOT_TIMEOUT = 480
Chris Sosab76e0ee2013-05-22 16:55:41 -0700118
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800119 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
120 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
121 _USB_POWER_TIMEOUT = 5
122 _POWER_CYCLE_TIMEOUT = 10
123
beeps32a63082013-08-22 14:02:29 -0700124 _RPC_PROXY_URL = 'http://localhost:%d'
Christopher Wileydd181852013-10-10 19:56:58 -0700125 _RPC_SHUTDOWN_POLLING_PERIOD_SECONDS = 2
Peter Qiu4410db72014-06-05 10:32:41 -0700126 # Set shutdown timeout to account for the time for restarting the UI.
127 _RPC_SHUTDOWN_TIMEOUT_SECONDS = cros_ui.RESTART_UI_TIMEOUT
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800128
Richard Barnette82c35912012-11-20 10:09:10 -0800129 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
130 'rpm_recovery_boards', type=str).split(',')
131
132 _MAX_POWER_CYCLE_ATTEMPTS = 6
133 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
Fang Dengdeba14f2014-11-14 11:54:09 -0800134 _RPM_HOSTNAME_REGEX = ('chromeos(\d+)(-row(\d+))?-rack(\d+[a-z]*)'
135 '-host(\d+)')
Gwendal Grignoua66f1d12014-12-03 10:07:26 -0800136 _LIGHT_SENSOR_FILES = [ "in_illuminance0_input",
137 "in_illuminance_input",
138 "in_illuminance0_raw",
139 "in_illuminance_raw",
140 "illuminance0_input"]
Richard Barnette82c35912012-11-20 10:09:10 -0800141 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
142 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800143 _DETECTABLE_LABELS = []
144 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
145 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700146
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800147 # Constants used in ping_wait_up() and ping_wait_down().
148 #
149 # _PING_WAIT_COUNT is the approximate number of polling
150 # cycles to use when waiting for a host state change.
151 #
152 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
153 # for arguments to the internal _ping_wait_for_status()
154 # method.
155 _PING_WAIT_COUNT = 40
156 _PING_STATUS_DOWN = False
157 _PING_STATUS_UP = True
158
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800159 # Allowed values for the power_method argument.
160
161 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
162 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
163 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
164 POWER_CONTROL_RPM = 'RPM'
165 POWER_CONTROL_SERVO = 'servoj10'
166 POWER_CONTROL_MANUAL = 'manual'
167
168 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
169 POWER_CONTROL_SERVO,
170 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800171
Simran Basi5e6339a2013-03-21 11:34:32 -0700172 _RPM_OUTLET_CHANGED = 'outlet_changed'
173
Dan Shi9cb0eec2014-06-03 09:04:50 -0700174 # URL pattern to download firmware image.
175 _FW_IMAGE_URL_PATTERN = global_config.global_config.get_config_value(
176 'CROS', 'firmware_url_pattern', type=str)
beeps687243d2013-07-18 15:29:27 -0700177
MK Ryu35d661e2014-09-25 17:44:10 -0700178 # File that has a list of directories to be collected
179 _LOGS_TO_COLLECT_FILE = os.path.join(
180 common.client_dir, 'common_lib', 'logs_to_collect')
181
182 # Prefix of logging message w.r.t. crash collection
183 _CRASHLOGS_PREFIX = 'collect_crashlogs'
184
185 # Time duration waiting for host up/down check
186 _CHECK_HOST_UP_TIMEOUT_SECS = 15
187
188 # A command that interacts with kernel and hardware (e.g., rm, mkdir, etc)
189 # might not be completely done deep through the hardware when the machine
190 # is powered down right after the command returns.
191 # We should wait for a few seconds to make them done. Finger crossed.
192 _SAFE_WAIT_SECS = 10
193
194
J. Richard Barnette964fba02012-10-24 17:34:29 -0700195 @staticmethod
beeps46dadc92013-11-07 14:07:10 -0800196 def check_host(host, timeout=10):
197 """
198 Check if the given host is a chrome-os host.
199
200 @param host: An ssh host representing a device.
201 @param timeout: The timeout for the run command.
202
203 @return: True if the host device is chromeos.
204
beeps46dadc92013-11-07 14:07:10 -0800205 """
206 try:
Christopher Wiley1ea80942014-02-26 16:45:08 -0800207 result = host.run('grep -q CHROMEOS /etc/lsb-release && '
Simran Basie5f7ae42014-06-26 15:44:06 -0700208 '! which adb >/dev/null 2>&1 && '
209 '! grep -q moblab /etc/lsb-release',
Christopher Wileyfc3eac02013-11-21 16:24:57 -0800210 ignore_status=True, timeout=timeout)
beeps46dadc92013-11-07 14:07:10 -0800211 except (error.AutoservRunError, error.AutoservSSHTimeout):
212 return False
213 return result.exit_status == 0
214
215
216 @staticmethod
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800217 def _extract_arguments(args_dict, key_subset):
218 """Extract options from `args_dict` and return a subset result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800219
220 Take the provided dictionary of argument options and return
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800221 a subset that represent standard arguments needed to construct
222 a test-assistant object (chameleon or servo) for a host. The
223 intent is to provide standard argument processing from
224 run_remote_tests for tests that require a test-assistant board
225 to operate.
226
227 @param args_dict Dictionary from which to extract the arguments.
228 @param key_subset Tuple of keys to extract from the args_dict, e.g.
229 ('servo_host', 'servo_port').
230 """
231 result = {}
232 for arg in key_subset:
233 if arg in args_dict:
234 result[arg] = args_dict[arg]
235 return result
236
237
238 @staticmethod
239 def get_chameleon_arguments(args_dict):
240 """Extract chameleon options from `args_dict` and return the result.
241
242 Recommended usage:
243 ~~~~~~~~
244 args_dict = utils.args_to_dict(args)
245 chameleon_args = hosts.CrosHost.get_chameleon_arguments(args_dict)
246 host = hosts.create_host(machine, chameleon_args=chameleon_args)
247 ~~~~~~~~
248
249 @param args_dict Dictionary from which to extract the chameleon
250 arguments.
251 """
252 return CrosHost._extract_arguments(
253 args_dict, ('chameleon_host', 'chameleon_port'))
254
255
256 @staticmethod
257 def get_servo_arguments(args_dict):
258 """Extract servo options from `args_dict` and return the result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800259
260 Recommended usage:
261 ~~~~~~~~
262 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700263 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800264 host = hosts.create_host(machine, servo_args=servo_args)
265 ~~~~~~~~
266
267 @param args_dict Dictionary from which to extract the servo
268 arguments.
269 """
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800270 return CrosHost._extract_arguments(
271 args_dict, ('servo_host', 'servo_port'))
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700272
J. Richard Barnette964fba02012-10-24 17:34:29 -0700273
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800274 def _initialize(self, hostname, chameleon_args=None, servo_args=None,
Fang Denge545abb2014-12-30 18:43:47 -0800275 try_lab_servo=False, ssh_verbosity_flag='', ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700276 *args, **dargs):
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800277 """Initialize superclasses, |self.chameleon|, and |self.servo|.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700278
Fang Denge545abb2014-12-30 18:43:47 -0800279 This method will attempt to create the test-assistant object
280 (chameleon/servo) when it is needed by the test. Check
281 the docstring of chameleon_host.create_chameleon_host and
282 servo_host.create_servo_host for how this is determined.
Fang Deng5d518f42013-08-02 14:04:32 -0700283
Fang Denge545abb2014-12-30 18:43:47 -0800284 @param hostname: Hostname of the dut.
285 @param chameleon_args: A dictionary that contains args for creating
286 a ChameleonHost. See chameleon_host for details.
287 @param servo_args: A dictionary that contains args for creating
288 a ServoHost object. See servo_host for details.
289 @param try_lab_servo: Boolean, False indicates that ServoHost should
290 not be created for a device in Cros test lab.
291 See servo_host for details.
292 @param ssh_verbosity_flag: String, to pass to the ssh command to control
293 verbosity.
294 @param ssh_options: String, other ssh options to pass to the ssh
295 command.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700296 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700297 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700298 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700299 # self.env is a dictionary of environment variable settings
300 # to be exported for commands run on the host.
301 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
302 # errors that might happen.
303 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700304 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700305 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700306 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700307 # TODO(fdeng): We need to simplify the
308 # process of servo and servo_host initialization.
309 # crbug.com/298432
Fang Denge545abb2014-12-30 18:43:47 -0800310 self._servo_host = servo_host.create_servo_host(
311 dut=self.hostname, servo_args=servo_args,
312 try_lab_servo=try_lab_servo)
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800313 # TODO(waihong): Do the simplication on Chameleon too.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800314 self._chameleon_host = chameleon_host.create_chameleon_host(
315 dut=self.hostname, chameleon_args=chameleon_args)
316
Dan Shi4d478522014-02-14 13:46:32 -0800317 if self._servo_host is not None:
318 self.servo = self._servo_host.get_servo()
319 else:
320 self.servo = None
321
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800322 if self._chameleon_host:
Tom Wai-Hong Tameaee3402014-01-22 08:52:10 +0800323 self.chameleon = self._chameleon_host.create_chameleon_board()
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800324 else:
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800325 self.chameleon = None
Fang Deng5d518f42013-08-02 14:04:32 -0700326
327
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500328 def get_repair_image_name(self):
329 """Generate a image_name from variables in the global config.
330
331 @returns a str of $board-version/$BUILD.
332
333 """
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500334 board = self._get_board_from_afe()
335 if board is None:
336 raise error.AutoservError('DUT has no board attribute, '
337 'cannot be repaired.')
Dan Shi6964fa52014-12-18 11:04:27 -0800338 stable_version = self._AFE.run('get_stable_version', board=board)
339 build_pattern = global_config.global_config.get_config_value(
340 'CROS', 'stable_build_pattern')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500341 return build_pattern % (board, stable_version)
342
343
Scott Zawalski62bacae2013-03-05 10:40:32 -0500344 def _host_in_AFE(self):
345 """Check if the host is an object the AFE knows.
346
347 @returns the host object.
348 """
349 return self._AFE.get_hosts(hostname=self.hostname)
350
351
Chris Sosab76e0ee2013-05-22 16:55:41 -0700352 def lookup_job_repo_url(self):
353 """Looks up the job_repo_url for the host.
354
355 @returns job_repo_url from AFE or None if not found.
356
357 @raises KeyError if the host does not have a job_repo_url
358 """
359 if not self._host_in_AFE():
360 return None
361
362 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700363 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
364 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700365
366
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500367 def clear_cros_version_labels_and_job_repo_url(self):
368 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500369 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400370 return
371
Scott Zawalski62bacae2013-03-05 10:40:32 -0500372 host_list = [self.hostname]
373 labels = self._AFE.get_labels(
374 name__startswith=ds_constants.VERSION_PREFIX,
375 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800376
Scott Zawalski62bacae2013-03-05 10:40:32 -0500377 for label in labels:
378 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500379
beepscb6f1e22013-06-28 19:14:10 -0700380 self.update_job_repo_url(None, None)
381
382
383 def update_job_repo_url(self, devserver_url, image_name):
384 """
385 Updates the job_repo_url host attribute and asserts it's value.
386
387 @param devserver_url: The devserver to use in the job_repo_url.
388 @param image_name: The name of the image to use in the job_repo_url.
389
390 @raises AutoservError: If we failed to update the job_repo_url.
391 """
392 repo_url = None
393 if devserver_url and image_name:
394 repo_url = tools.get_package_url(devserver_url, image_name)
395 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500396 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700397 if self.lookup_job_repo_url() != repo_url:
398 raise error.AutoservError('Failed to update job_repo_url with %s, '
399 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500400
401
Dan Shie9309262013-06-19 22:50:21 -0700402 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400403 """Add cros_version labels and host attribute job_repo_url.
404
405 @param image_name: The name of the image e.g.
406 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700407
Scott Zawalskieadbf702013-03-14 09:23:06 -0400408 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500409 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400410 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500411
Scott Zawalskieadbf702013-03-14 09:23:06 -0400412 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700413 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500414
415 labels = self._AFE.get_labels(name=cros_label)
416 if labels:
417 label = labels[0]
418 else:
419 label = self._AFE.create_label(name=cros_label)
420
421 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700422 self.update_job_repo_url(devserver_url, image_name)
423
424
beepsdae65fd2013-07-26 16:24:41 -0700425 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700426 """
427 Make sure job_repo_url of this host is valid.
428
joychen03eaad92013-06-26 09:55:21 -0700429 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700430 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
431 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
432 download and extract it. If the devserver embedded in the url is
433 unresponsive, update the job_repo_url of the host after staging it on
434 another devserver.
435
436 @param job_repo_url: A url pointing to the devserver where the autotest
437 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700438 @param tag: The tag from the server job, in the format
439 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700440
441 @raises DevServerException: If we could not resolve a devserver.
442 @raises AutoservError: If we're unable to save the new job_repo_url as
443 a result of choosing a new devserver because the old one failed to
444 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700445 @raises urllib2.URLError: If the devserver embedded in job_repo_url
446 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700447 """
448 job_repo_url = self.lookup_job_repo_url()
449 if not job_repo_url:
450 logging.warning('No job repo url set on host %s', self.hostname)
451 return
452
453 logging.info('Verifying job repo url %s', job_repo_url)
454 devserver_url, image_name = tools.get_devserver_build_from_package_url(
455 job_repo_url)
456
beeps0c865032013-07-30 11:37:06 -0700457 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700458
459 logging.info('Staging autotest artifacts for %s on devserver %s',
460 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700461
462 start_time = time.time()
Simran Basi25e7a922014-10-31 11:56:10 -0700463 ds.stage_artifacts(image_name, ['autotest_packages'])
beeps687243d2013-07-18 15:29:27 -0700464 stage_time = time.time() - start_time
465
466 # Record how much of the verification time comes from a devserver
467 # restage. If we're doing things right we should not see multiple
468 # devservers for a given board/build/branch path.
469 try:
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800470 board, build_type, branch = server_utils.ParseBuildName(
beeps687243d2013-07-18 15:29:27 -0700471 image_name)[:3]
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800472 except server_utils.ParseBuildNameException:
beeps687243d2013-07-18 15:29:27 -0700473 pass
474 else:
beeps0c865032013-07-30 11:37:06 -0700475 devserver = devserver_url[
Chris Sosa65425082013-10-16 13:26:22 -0700476 devserver_url.find('/') + 2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700477 stats_key = {
478 'board': board,
479 'build_type': build_type,
480 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700481 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700482 }
Gabe Black1e1c41b2015-02-04 23:55:15 -0800483 autotest_stats.Gauge('verify_job_repo_url').send(
beeps687243d2013-07-18 15:29:27 -0700484 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
485 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700486
Scott Zawalskieadbf702013-03-14 09:23:06 -0400487
Dan Shi0f466e82013-02-22 15:44:58 -0800488 def _try_stateful_update(self, update_url, force_update, updater):
489 """Try to use stateful update to initialize DUT.
490
491 When DUT is already running the same version that machine_install
492 tries to install, stateful update is a much faster way to clean up
493 the DUT for testing, compared to a full reimage. It is implemeted
494 by calling autoupdater.run_update, but skipping updating root, as
495 updating the kernel is time consuming and not necessary.
496
497 @param update_url: url of the image.
498 @param force_update: Set to True to update the image even if the DUT
499 is running the same version.
500 @param updater: ChromiumOSUpdater instance used to update the DUT.
501 @returns: True if the DUT was updated with stateful update.
502
503 """
J. Richard Barnette3f731032014-04-07 17:42:59 -0700504 # TODO(jrbarnette): Yes, I hate this re.match() test case.
505 # It's better than the alternative: see crbug.com/360944.
506 image_name = autoupdater.url_to_image_name(update_url)
507 release_pattern = r'^.*-release/R[0-9]+-[0-9]+\.[0-9]+\.0$'
508 if not re.match(release_pattern, image_name):
509 return False
Dan Shi0f466e82013-02-22 15:44:58 -0800510 if not updater.check_version():
511 return False
512 if not force_update:
513 logging.info('Canceling stateful update because the new and '
514 'old versions are the same.')
515 return False
516 # Following folders should be rebuilt after stateful update.
517 # A test file is used to confirm each folder gets rebuilt after
518 # the stateful update.
519 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
520 test_file = '.test_file_to_be_deleted'
521 for folder in folders_to_check:
522 touch_path = os.path.join(folder, test_file)
523 self.run('touch %s' % touch_path)
524
525 if not updater.run_update(force_update=True, update_root=False):
526 return False
527
528 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700529 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800530 check_file_cmd = 'test -f %s; echo $?'
531 for folder in folders_to_check:
532 test_file_path = os.path.join(folder, test_file)
533 result = self.run(check_file_cmd % test_file_path,
534 ignore_status=True)
535 if result.exit_status == 1:
536 return False
537 return True
538
539
J. Richard Barnette7275b612013-06-04 18:13:11 -0700540 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800541 """After the DUT is updated, confirm machine_install succeeded.
542
543 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700544 @param expected_kernel: kernel expected to be active after reboot,
545 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800546
547 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700548 # Touch the lab machine file to leave a marker that
549 # distinguishes this image from other test images.
550 # Afterwards, we must re-run the autoreboot script because
551 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800552 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800553 self.run('start autoreboot')
Chris Sosa65425082013-10-16 13:26:22 -0700554 updater.verify_boot_expectations(
555 expected_kernel, rollback_message=
556 'Build %s failed to boot on %s; system rolled back to previous'
557 'build' % (updater.update_version, self.hostname))
J. Richard Barnette7275b612013-06-04 18:13:11 -0700558 # Check that we've got the build we meant to install.
559 if not updater.check_version_to_confirm_install():
560 raise autoupdater.ChromiumOSError(
561 'Failed to update %s to build %s; found build '
562 '%s instead' % (self.hostname,
Chris Sosa65425082013-10-16 13:26:22 -0700563 updater.update_version,
564 updater.get_build_id()))
Dan Shi0f466e82013-02-22 15:44:58 -0800565
566
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700567 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400568 """Stage a build on a devserver and return the update_url.
569
570 @param image_name: a name like lumpy-release/R27-3837.0.0
571 @returns an update URL like:
572 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
573 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700574 if not image_name:
575 image_name = self.get_repair_image_name()
576 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400577 devserver = dev_server.ImageServer.resolve(image_name)
578 devserver.trigger_download(image_name, synchronous=False)
579 return tools.image_url_pattern() % (devserver.url(), image_name)
580
581
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700582 def stage_image_for_servo(self, image_name=None):
583 """Stage a build on a devserver and return the update_url.
584
585 @param image_name: a name like lumpy-release/R27-3837.0.0
586 @returns an update URL like:
587 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
588 """
589 if not image_name:
590 image_name = self.get_repair_image_name()
591 logging.info('Staging build for servo install: %s', image_name)
592 devserver = dev_server.ImageServer.resolve(image_name)
593 devserver.stage_artifacts(image_name, ['test_image'])
594 return devserver.get_test_image_url(image_name)
595
596
beepse539be02013-07-31 21:57:39 -0700597 def stage_factory_image_for_servo(self, image_name):
598 """Stage a build on a devserver and return the update_url.
599
600 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700601
beepse539be02013-07-31 21:57:39 -0700602 @return: An update URL, eg:
603 http://<devserver>/static/canary-channel/\
604 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700605
606 @raises: ValueError if the factory artifact name is missing from
607 the config.
608
beepse539be02013-07-31 21:57:39 -0700609 """
610 if not image_name:
611 logging.error('Need an image_name to stage a factory image.')
612 return
613
beeps12c0a3c2013-09-03 11:58:27 -0700614 factory_artifact = global_config.global_config.get_config_value(
615 'CROS', 'factory_artifact', type=str, default='')
616 if not factory_artifact:
617 raise ValueError('Cannot retrieve the factory artifact name from '
618 'autotest config, and hence cannot stage factory '
619 'artifacts.')
620
beepse539be02013-07-31 21:57:39 -0700621 logging.info('Staging build for servo install: %s', image_name)
622 devserver = dev_server.ImageServer.resolve(image_name)
623 devserver.stage_artifacts(
624 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700625 [factory_artifact],
626 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700627
628 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
629
630
Chris Sosaa3ac2152012-05-23 22:23:13 -0700631 def machine_install(self, update_url=None, force_update=False,
Fang Deng3d3b9272014-12-22 12:20:28 -0800632 local_devserver=False, repair=False,
633 force_full_update=False):
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500634 """Install the DUT.
635
Dan Shi0f466e82013-02-22 15:44:58 -0800636 Use stateful update if the DUT is already running the same build.
637 Stateful update does not update kernel and tends to run much faster
638 than a full reimage. If the DUT is running a different build, or it
639 failed to do a stateful update, full update, including kernel update,
640 will be applied to the DUT.
641
Scott Zawalskieadbf702013-03-14 09:23:06 -0400642 Once a host enters machine_install its cros_version label will be
643 removed as well as its host attribute job_repo_url (used for
644 package install).
645
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500646 @param update_url: The url to use for the update
647 pattern: http://$devserver:###/update/$build
648 If update_url is None and repair is True we will install the
Dan Shi6964fa52014-12-18 11:04:27 -0800649 stable image listed in afe_stable_versions table. If the table
650 is not setup, global_config value under CROS.stable_cros_version
651 will be used instead.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500652 @param force_update: Force an update even if the version installed
653 is the same. Default:False
654 @param local_devserver: Used by run_remote_test to allow people to
655 use their local devserver. Default: False
656 @param repair: Whether or not we are in repair mode. This adds special
657 cases for repairing a machine like starting update_engine.
658 Setting repair to True sets force_update to True as well.
659 default: False
Fang Deng3d3b9272014-12-22 12:20:28 -0800660 @param force_full_update: If True, do not attempt to run stateful
661 update, force a full reimage. If False, try stateful update
662 first when the dut is already installed with the same version.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500663 @raises autoupdater.ChromiumOSError
664
665 """
Dan Shi7458bf62013-06-10 12:50:16 -0700666 if update_url:
667 logging.debug('update url is set to %s', update_url)
668 else:
669 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700670 if self._parser.options.image:
671 requested_build = self._parser.options.image
672 if requested_build.startswith('http://'):
673 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700674 logging.debug('update url is retrieved from requested_build'
675 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700676 else:
677 # Try to stage any build that does not start with
678 # http:// on the devservers defined in
679 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700680 update_url = self._stage_image_for_update(requested_build)
681 logging.debug('Build staged, and update_url is set to: %s',
682 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700683 elif repair:
684 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700685 logging.debug('Build staged, and update_url is set to: %s',
686 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400687 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700688 raise autoupdater.ChromiumOSError(
689 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500690
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500691 if repair:
J. Richard Barnette158f1792015-02-24 17:43:53 -0800692 # In case the system is in a bad state, we always reboot
693 # the machine before trying to repair.
694 #
695 # If Chrome is crashing, the ui-respawn job may reboot
696 # the DUT to try and "fix" it. Guard against that
697 # behavior by stopping the 'ui' job.
698 #
699 # If Chrome failed to start, update-engine won't be running,
700 # so restart it by force.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700701 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette158f1792015-02-24 17:43:53 -0800702 self.run('stop ui || true')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500703 self.run('stop update-engine; start update-engine')
704 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800705
Chris Sosaa3ac2152012-05-23 22:23:13 -0700706 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700707 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800708 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400709 # Remove cros-version and job_repo_url host attribute from host.
710 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800711 # If the DUT is already running the same build, try stateful update
712 # first. Stateful update does not update kernel and tends to run much
713 # faster than a full reimage.
Fang Deng3d3b9272014-12-22 12:20:28 -0800714 if not force_full_update:
715 try:
716 updated = self._try_stateful_update(
717 update_url, force_update, updater)
718 if updated:
719 logging.info('DUT is updated with stateful update.')
720 except Exception as e:
721 logging.exception(e)
722 logging.warning('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700723
Dan Shi0f466e82013-02-22 15:44:58 -0800724 inactive_kernel = None
725 # Do a full update if stateful update is not applicable or failed.
726 if not updated:
Chris Sosab7612bc2013-03-21 10:32:37 -0700727 # TODO(sosa): Remove temporary hack to get rid of bricked machines
728 # that can't update due to a corrupted policy.
729 self.run('rm -rf /var/lib/whitelist')
Danny Chanad490bd2014-11-04 14:15:57 -0800730 self.run('mkdir /var/lib/whitelist')
Chris Sosab7612bc2013-03-21 10:32:37 -0700731 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400732 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700733
Dan Shi0f466e82013-02-22 15:44:58 -0800734 if updater.run_update(force_update):
735 updated = True
736 # Figure out active and inactive kernel.
737 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700738
Dan Shi0f466e82013-02-22 15:44:58 -0800739 # Ensure inactive kernel has higher priority than active.
740 if (updater.get_kernel_priority(inactive_kernel)
741 < updater.get_kernel_priority(active_kernel)):
742 raise autoupdater.ChromiumOSError(
743 'Update failed. The priority of the inactive kernel'
744 ' partition is less than that of the active kernel'
745 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700746
Dan Shi0f466e82013-02-22 15:44:58 -0800747 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700748 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi5699ac22014-12-19 10:55:49 -0800749
750 if updated:
Dan Shi0f466e82013-02-22 15:44:58 -0800751 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400752 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700753 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800754
Simran Basiae08c8c2014-09-02 11:17:26 -0700755 logging.debug('Cleaning up old autotest directories.')
756 try:
757 installed_autodir = autotest.Autotest.get_installed_autodir(self)
758 self.run('rm -rf ' + installed_autodir)
759 except autotest.AutodirNotFoundError:
760 logging.debug('No autotest installed directory found.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700761
762
Dan Shi9cb0eec2014-06-03 09:04:50 -0700763 def _clear_fw_version_labels(self):
764 """Clear firmware version labels from the machine."""
765 labels = self._AFE.get_labels(
766 name__startswith=provision.FW_VERSION_PREFIX,
767 host__hostname=self.hostname)
768 for label in labels:
769 label.remove_hosts(hosts=[self.hostname])
770
771
772 def _add_fw_version_label(self, build):
773 """Add firmware version label to the machine.
774
775 @param build: Build of firmware.
776
777 """
778 fw_label = provision.fw_version_to_label(build)
779 provision.ensure_label_exists(fw_label)
780 label = self._AFE.get_labels(name__startswith=fw_label)[0]
781 label.add_hosts([self.hostname])
782
783
784 def firmware_install(self, build=None):
785 """Install firmware to the DUT.
786
787 Use stateful update if the DUT is already running the same build.
788 Stateful update does not update kernel and tends to run much faster
789 than a full reimage. If the DUT is running a different build, or it
790 failed to do a stateful update, full update, including kernel update,
791 will be applied to the DUT.
792
793 Once a host enters firmware_install its fw_version label will be
794 removed. After the firmware is updated successfully, a new fw_version
795 label will be added to the host.
796
797 @param build: The build version to which we want to provision the
798 firmware of the machine,
799 e.g. 'link-firmware/R22-2695.1.144'.
800
801 TODO(dshi): After bug 381718 is fixed, update here with corresponding
802 exceptions that could be raised.
803
804 """
805 if not self.servo:
806 raise error.TestError('Host %s does not have servo.' %
807 self.hostname)
808
809 # TODO(fdeng): use host.get_board() after
810 # crbug.com/271834 is fixed.
811 board = self._get_board_from_afe()
812
813 # If build is not set, assume it's repair mode and try to install
814 # firmware from stable CrOS.
815 if not build:
816 build = self.get_repair_image_name()
817
818 config = FAFTConfig(board)
819 if config.use_u_boot:
820 ap_image = 'image-%s.bin' % board
821 else: # Depthcharge platform
822 ap_image = 'image.bin'
823 ec_image = 'ec.bin'
824 ds = dev_server.ImageServer.resolve(build)
825 ds.stage_artifacts(build, ['firmware'])
826
827 tmpd = autotemp.tempdir(unique_id='fwimage')
828 try:
829 fwurl = self._FW_IMAGE_URL_PATTERN % (ds.url(), build)
830 local_tarball = os.path.join(tmpd.name, os.path.basename(fwurl))
831 server_utils.system('wget -O %s %s' % (local_tarball, fwurl),
832 timeout=60)
833 server_utils.system('tar xf %s -C %s %s %s' %
834 (local_tarball, tmpd.name, ap_image, ec_image),
835 timeout=60)
836 server_utils.system('tar xf %s --wildcards -C %s "dts/*"' %
837 (local_tarball, tmpd.name),
838 timeout=60, ignore_status=True)
839
840 self._clear_fw_version_labels()
841 logging.info('Will re-program EC now')
842 self.servo.program_ec(os.path.join(tmpd.name, ec_image))
843 logging.info('Will re-program BIOS now')
844 self.servo.program_bios(os.path.join(tmpd.name, ap_image))
845 self.servo.get_power_state_controller().reset()
846 time.sleep(self.servo.BOOT_DELAY)
847 self._add_fw_version_label()
848 finally:
849 tmpd.clean()
850
851
Dan Shi10e992b2013-08-30 11:02:59 -0700852 def show_update_engine_log(self):
853 """Output update engine log."""
MK Ryu35d661e2014-09-25 17:44:10 -0700854 logging.debug('Dumping %s', client_constants.UPDATE_ENGINE_LOG)
855 self.run('cat %s' % client_constants.UPDATE_ENGINE_LOG)
Dan Shi10e992b2013-08-30 11:02:59 -0700856
857
Richard Barnette82c35912012-11-20 10:09:10 -0800858 def _get_board_from_afe(self):
859 """Retrieve this host's board from its labels in the AFE.
860
861 Looks for a host label of the form "board:<board>", and
862 returns the "<board>" part of the label. `None` is returned
863 if there is not a single, unique label matching the pattern.
864
865 @returns board from label, or `None`.
866 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700867 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800868
869
870 def get_build(self):
871 """Retrieve the current build for this Host from the AFE.
872
873 Looks through this host's labels in the AFE to determine its build.
874
875 @returns The current build or None if it could not find it or if there
876 were multiple build labels assigned to this host.
877 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700878 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800879
880
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500881 def _install_repair(self):
882 """Attempt to repair this host using upate-engine.
883
884 If the host is up, try installing the DUT with a stable
Dan Shi6964fa52014-12-18 11:04:27 -0800885 "repair" version of Chrome OS as defined in afe_stable_versions table.
886 If the table is not setup, global_config value under
887 CROS.stable_cros_version will be used instead.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500888
Scott Zawalski62bacae2013-03-05 10:40:32 -0500889 @raises AutoservRepairMethodNA if the DUT is not reachable.
890 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500891
892 """
893 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500894 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500895 logging.info('Attempting to reimage machine to repair image.')
896 try:
897 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700898 except autoupdater.ChromiumOSError as e:
899 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500900 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500901 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500902
903
Dan Shi2c88eed2013-11-12 10:18:38 -0800904 def _install_repair_with_powerwash(self):
Dan Shi9cc48452013-11-12 12:39:26 -0800905 """Attempt to powerwash first then repair this host using update-engine.
Dan Shi2c88eed2013-11-12 10:18:38 -0800906
Dan Shi9cc48452013-11-12 12:39:26 -0800907 update-engine may fail due to a bad image. In such case, powerwash
908 may help to cleanup the DUT for update-engine to work again.
Dan Shi2c88eed2013-11-12 10:18:38 -0800909
910 @raises AutoservRepairMethodNA if the DUT is not reachable.
911 @raises ChromiumOSError if the install failed for some reason.
912
913 """
914 if not self.is_up():
915 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
916
917 logging.info('Attempting to powerwash the DUT.')
918 self.run('echo "fast safe" > '
919 '/mnt/stateful_partition/factory_install_reset')
920 self.reboot(timeout=self.POWERWASH_BOOT_TIMEOUT, wait=True)
921 if not self.is_up():
Dan Shi9cc48452013-11-12 12:39:26 -0800922 logging.error('Powerwash failed. DUT did not come back after '
Dan Shi2c88eed2013-11-12 10:18:38 -0800923 'reboot.')
924 raise error.AutoservRepairFailure(
925 'DUT failed to boot from powerwash after %d seconds' %
926 self.POWERWASH_BOOT_TIMEOUT)
927
928 logging.info('Powerwash succeeded.')
929 self._install_repair()
930
931
beepsf079cfb2013-09-18 17:49:51 -0700932 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
933 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500934 """
935 Re-install the OS on the DUT by:
936 1) installing a test image on a USB storage device attached to the Servo
937 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800938 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700939 3) installing the image with chromeos-install.
940
Scott Zawalski62bacae2013-03-05 10:40:32 -0500941 @param image_url: If specified use as the url to install on the DUT.
942 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700943 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
944 Factory images need a longer usb_boot_timeout than regular
945 cros images.
946 @param install_timeout: The timeout to use when installing the chromeos
947 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800948
Scott Zawalski62bacae2013-03-05 10:40:32 -0500949 @raises AutoservError if the image fails to boot.
beepsf079cfb2013-09-18 17:49:51 -0700950
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800951 """
beepsf079cfb2013-09-18 17:49:51 -0700952 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
953 % usb_boot_timeout)
954 logging.info('Downloading image to USB, then booting from it. Usb boot '
955 'timeout = %s', usb_boot_timeout)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800956 timer = autotest_stats.Timer(usb_boot_timer_key)
beepsf079cfb2013-09-18 17:49:51 -0700957 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700958 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700959 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500960 raise error.AutoservRepairFailure(
961 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700962 usb_boot_timeout)
963 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500964
beepsf079cfb2013-09-18 17:49:51 -0700965 install_timer_key = ('servo_install.install_timeout_%s'
966 % install_timeout)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800967 timer = autotest_stats.Timer(install_timer_key)
beepsf079cfb2013-09-18 17:49:51 -0700968 timer.start()
969 logging.info('Installing image through chromeos-install.')
MK Ryu35d661e2014-09-25 17:44:10 -0700970 self.run('chromeos-install --yes --lab_preserve_logs=%s' %
971 self._LOGS_TO_COLLECT_FILE,
972 timeout=install_timeout)
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800973 self.run('halt')
beepsf079cfb2013-09-18 17:49:51 -0700974 timer.stop()
975
976 logging.info('Power cycling DUT through servo.')
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800977 self.servo.get_power_state_controller().power_off()
Fang Dengafb88142013-05-30 17:44:31 -0700978 self.servo.switch_usbkey('off')
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800979 # N.B. The Servo API requires that we use power_on() here
980 # for two reasons:
981 # 1) After turning on a DUT in recovery mode, you must turn
982 # it off and then on with power_on() once more to
983 # disable recovery mode (this is a Parrot specific
984 # requirement).
985 # 2) After power_off(), the only way to turn on is with
986 # power_on() (this is a Storm specific requirement).
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700987 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -0700988
989 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800990 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
991 raise error.AutoservError('DUT failed to reboot installed '
992 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500993 self.BOOT_TIMEOUT)
994
995
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700996 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500997 """Reinstall the DUT utilizing servo and a test image.
998
999 Re-install the OS on the DUT by:
1000 1) installing a test image on a USB storage device attached to the Servo
1001 board,
1002 2) booting that image in recovery mode, and then
1003 3) installing the image with chromeos-install.
1004
Scott Zawalski62bacae2013-03-05 10:40:32 -05001005 @raises AutoservRepairMethodNA if the device does not have servo
1006 support.
1007
1008 """
1009 if not self.servo:
1010 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
1011 'DUT has no servo support.')
1012
1013 logging.info('Attempting to recovery servo enabled device with '
1014 'servo_repair_reinstall')
1015
J. Richard Barnettee4af8b92013-05-01 13:16:12 -07001016 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001017 self.servo_install(image_url)
1018
1019
1020 def _servo_repair_power(self):
1021 """Attempt to repair DUT using an attached Servo.
1022
1023 Attempt to power on the DUT via power_long_press.
1024
1025 @raises AutoservRepairMethodNA if the device does not have servo
1026 support.
1027 @raises AutoservRepairFailure if the repair fails for any reason.
1028 """
1029 if not self.servo:
1030 raise error.AutoservRepairMethodNA('Repair Power NA: '
1031 'DUT has no servo support.')
1032
1033 logging.info('Attempting to recover servo enabled device by '
1034 'powering it off and on.')
1035 self.servo.get_power_state_controller().power_off()
1036 self.servo.get_power_state_controller().power_on()
1037 if self.wait_up(self.BOOT_TIMEOUT):
1038 return
1039
1040 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -08001041
1042
Richard Barnette82c35912012-11-20 10:09:10 -08001043 def _powercycle_to_repair(self):
1044 """Utilize the RPM Infrastructure to bring the host back up.
1045
1046 If the host is not up/repaired after the first powercycle we utilize
1047 auto fallback to the last good install by powercycling and rebooting the
1048 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001049
1050 @raises AutoservRepairMethodNA if the device does not support remote
1051 power.
1052 @raises AutoservRepairFailure if the repair fails for any reason.
1053
Richard Barnette82c35912012-11-20 10:09:10 -08001054 """
Scott Zawalski62bacae2013-03-05 10:40:32 -05001055 if not self.has_power():
1056 raise error.AutoservRepairMethodNA('Device does not support power.')
1057
Richard Barnette82c35912012-11-20 10:09:10 -08001058 logging.info('Attempting repair via RPM powercycle.')
1059 failed_cycles = 0
1060 self.power_cycle()
1061 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
1062 failed_cycles += 1
1063 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -05001064 raise error.AutoservRepairFailure(
1065 'Powercycled host %s %d times; device did not come back'
1066 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -08001067 self.power_cycle()
1068 if failed_cycles == 0:
1069 logging.info('Powercycling was successful first time.')
1070 else:
1071 logging.info('Powercycling was successful after %d failures.',
1072 failed_cycles)
1073
1074
MK Ryu35d661e2014-09-25 17:44:10 -07001075 def _reboot_repair(self):
1076 """SSH to this host and reboot."""
1077 if not self.is_up(self._CHECK_HOST_UP_TIMEOUT_SECS):
1078 raise error.AutoservRepairMethodNA('DUT unreachable for reboot.')
1079 logging.info('Attempting repair via SSH reboot.')
1080 self.reboot(timeout=self.BOOT_TIMEOUT, wait=True)
1081
1082
Prashanth B4d8184f2014-05-05 12:22:02 -07001083 def check_device(self):
1084 """Check if a device is ssh-able, and if so, clean and verify it.
1085
1086 @raise AutoservSSHTimeout: If the ssh ping times out.
1087 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
1088 permissions.
1089 @raise AutoservSshPingHostError: For other AutoservRunErrors during
1090 ssh_ping.
1091 @raises AutoservError: As appropriate, during cleanup and verify.
1092 """
1093 self.ssh_ping()
1094 self.cleanup()
1095 self.verify()
1096
1097
Richard Barnette82c35912012-11-20 10:09:10 -08001098 def repair_full(self):
1099 """Repair a host for repair level NO_PROTECTION.
1100
1101 This overrides the base class function for repair; it does
1102 not call back to the parent class, but instead offers a
1103 simplified implementation based on the capabilities in the
1104 Chrome OS test lab.
1105
Fang Deng5d518f42013-08-02 14:04:32 -07001106 It first verifies and repairs servo if it is a DUT in CrOS
Fang Deng03590af2013-10-07 17:34:20 -07001107 lab and a servo is attached.
Fang Deng5d518f42013-08-02 14:04:32 -07001108
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001109 This escalates in order through the following procedures and verifies
1110 the status using `self.check_device()` after each of them. This is done
1111 until both the repair and the veryfing step succeed.
1112
MK Ryu35d661e2014-09-25 17:44:10 -07001113 Escalation order of repair procedures from less intrusive to
1114 more intrusive repairs:
1115 1. SSH to the DUT and reboot.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001116 2. If there's a servo for the DUT, try to power the DUT off and
1117 on.
MK Ryu35d661e2014-09-25 17:44:10 -07001118 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -08001119 by power-cycling.
MK Ryu35d661e2014-09-25 17:44:10 -07001120 4. Try to re-install to a known stable image using
1121 auto-update.
1122 5. If there's a servo for the DUT, try to re-install via
1123 the servo.
Richard Barnette82c35912012-11-20 10:09:10 -08001124
1125 As with the parent method, the last operation performed on
Prashanth B4d8184f2014-05-05 12:22:02 -07001126 the DUT must be to call `self.check_device()`; If that call fails the
1127 exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -07001128
Scott Zawalski62bacae2013-03-05 10:40:32 -05001129 @raises AutoservRepairTotalFailure if the repair process fails to
1130 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -07001131 @raises ServoHostRepairTotalFailure if the repair process fails to
1132 fix the servo host if one is attached to the DUT.
1133 @raises AutoservSshPermissionDeniedError if it is unable
1134 to ssh to the servo host due to permission error.
1135
Richard Barnette82c35912012-11-20 10:09:10 -08001136 """
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001137 # Caution: Deleting shards relies on repair to always reboot the DUT.
1138
Dan Shi4d478522014-02-14 13:46:32 -08001139 if self._servo_host and not self.servo:
Fang Deng03590af2013-10-07 17:34:20 -07001140 try:
Dan Shi4d478522014-02-14 13:46:32 -08001141 self._servo_host.repair_full()
Fang Deng03590af2013-10-07 17:34:20 -07001142 except Exception as e:
Fang Deng03590af2013-10-07 17:34:20 -07001143 logging.error('Could not create a healthy servo: %s', e)
Dan Shi4d478522014-02-14 13:46:32 -08001144 self.servo = self._servo_host.get_servo()
Fang Deng5d518f42013-08-02 14:04:32 -07001145
MK Ryu35d661e2014-09-25 17:44:10 -07001146 self.try_collect_crashlogs()
1147
Scott Zawalski62bacae2013-03-05 10:40:32 -05001148 # TODO(scottz): This should use something similar to label_decorator,
1149 # but needs to be populated in order so DUTs are repaired with the
1150 # least amount of effort.
MK Ryu35d661e2014-09-25 17:44:10 -07001151 repair_funcs = [self._reboot_repair,
1152 self._servo_repair_power,
1153 self._powercycle_to_repair,
Dan Shi849a1c42014-03-05 11:10:43 -08001154 self._install_repair,
Dan Shi2c88eed2013-11-12 10:18:38 -08001155 self._install_repair_with_powerwash,
MK Ryu35d661e2014-09-25 17:44:10 -07001156 self._servo_repair_reinstall]
Scott Zawalski62bacae2013-03-05 10:40:32 -05001157 errors = []
Simran Basie6130932013-10-01 14:07:52 -07001158 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001159 for repair_func in repair_funcs:
1160 try:
1161 repair_func()
MK Ryu35d661e2014-09-25 17:44:10 -07001162 self.try_collect_crashlogs()
Prashanth B4d8184f2014-05-05 12:22:02 -07001163 self.check_device()
Gabe Black1e1c41b2015-02-04 23:55:15 -08001164 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001165 '%s.SUCCEEDED' % repair_func.__name__).increment()
1166 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001167 autotest_stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001168 '%s.%s.SUCCEEDED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001169 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001170 return
Simran Basie6130932013-10-01 14:07:52 -07001171 except error.AutoservRepairMethodNA as e:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001172 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001173 '%s.RepairNA' % repair_func.__name__).increment()
1174 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001175 autotest_stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001176 '%s.%s.RepairNA' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001177 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001178 logging.warning('Repair function NA: %s', e)
Simran Basie6130932013-10-01 14:07:52 -07001179 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -05001180 except Exception as e:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001181 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001182 '%s.FAILED' % repair_func.__name__).increment()
1183 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001184 autotest_stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001185 '%s.%s.FAILED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001186 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001187 logging.warning('Failed to repair device: %s', e)
Scott Zawalski62bacae2013-03-05 10:40:32 -05001188 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -05001189
Gabe Black1e1c41b2015-02-04 23:55:15 -08001190 autotest_stats.Counter('Full_Repair_Failed').increment()
Simran Basie6130932013-10-01 14:07:52 -07001191 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001192 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001193 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001194 raise error.AutoservRepairTotalFailure(
1195 'All attempts at repairing the device failed:\n%s' %
1196 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -08001197
1198
MK Ryu35d661e2014-09-25 17:44:10 -07001199 def try_collect_crashlogs(self, check_host_up=True):
1200 """
1201 Check if a host is up and logs need to be collected from the host,
1202 if yes, collect them.
1203
1204 @param check_host_up: Flag for checking host is up. Default is True.
1205 """
1206 try:
1207 crash_job = self._need_crash_logs()
1208 if crash_job:
1209 logging.debug('%s: Job %s was crashed', self._CRASHLOGS_PREFIX,
1210 crash_job)
1211 if not check_host_up or self.is_up(
1212 self._CHECK_HOST_UP_TIMEOUT_SECS):
1213 self._collect_crashlogs(crash_job)
1214 logging.debug('%s: Completed collecting logs for the '
1215 'crashed job %s', self._CRASHLOGS_PREFIX,
1216 crash_job)
1217 except Exception as e:
1218 # Exception should not result in repair failure.
1219 # Therefore, suppress all exceptions here.
1220 logging.error('%s: Failed while trying to collect crash-logs: %s',
1221 self._CRASHLOGS_PREFIX, e)
1222
1223
1224 def _need_crash_logs(self):
1225 """Get the value of need_crash_logs attribute of this host.
1226
1227 @return: Value string of need_crash_logs attribute
1228 None if there is no need_crash_logs attribute
1229 """
1230 attrs = self._AFE.get_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1231 hostname=self.hostname)
1232 assert len(attrs) < 2
1233 return attrs[0].value if attrs else None
1234
1235
1236 def _collect_crashlogs(self, job_id):
1237 """Grab logs from the host where a job was crashed.
1238
1239 First, check if PRIOR_LOGS_DIR exists in the host.
1240 If yes, collect them.
1241 Otherwise, check if a lab-machine marker (_LAB_MACHINE_FILE) exists
1242 in the host.
1243 If yes, the host was repaired automatically, and we collect normal
1244 system logs.
1245
1246 @param job_id: Id of the job that was crashed.
1247 """
1248 crashlogs_dir = crashcollect.get_crashinfo_dir(self,
1249 constants.CRASHLOGS_DEST_DIR_PREFIX)
1250 flag_prior_logs = False
1251
1252 if self.path_exists(client_constants.PRIOR_LOGS_DIR):
1253 flag_prior_logs = True
1254 self._collect_prior_logs(crashlogs_dir)
1255 elif self.path_exists(self._LAB_MACHINE_FILE):
1256 self._collect_system_logs(crashlogs_dir)
1257 else:
1258 logging.warning('%s: Host was manually re-installed without '
1259 '--lab_preserve_log option. Skip collecting '
1260 'crash-logs.', self._CRASHLOGS_PREFIX)
1261
1262 # We make crash collection be one-time effort.
1263 # _collect_prior_logs() and _collect_system_logs() will not throw
1264 # any exception, and following codes will be executed even when
1265 # those methods fail.
1266 # _collect_crashlogs() is called only when the host is up (refer
1267 # to try_collect_crashlogs()). We assume _collect_prior_logs() and
1268 # _collect_system_logs() fail rarely when the host is up.
1269 # In addition, it is not clear how many times we should try crash
1270 # collection again while not triggering next repair unnecessarily.
1271 # Threfore, we try crash collection one time.
1272
1273 # Create a marker file as soon as log collection is done.
1274 # Leave the job id to this marker for gs_offloader to consume.
1275 marker_file = os.path.join(crashlogs_dir, constants.CRASHLOGS_MARKER)
1276 with open(marker_file, 'a') as f:
1277 f.write('%s\n' % job_id)
1278
1279 # Remove need_crash_logs attribute
1280 logging.debug('%s: Remove attribute need_crash_logs from host %s',
1281 self._CRASHLOGS_PREFIX, self.hostname)
1282 self._AFE.set_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1283 None, hostname=self.hostname)
1284
1285 if flag_prior_logs:
1286 logging.debug('%s: Remove %s from host %s', self._CRASHLOGS_PREFIX,
1287 client_constants.PRIOR_LOGS_DIR, self.hostname)
1288 self.run('rm -rf %s; sync' % client_constants.PRIOR_LOGS_DIR)
1289 # Wait for a few seconds to make sure the prior command is
1290 # done deep through storage.
1291 time.sleep(self._SAFE_WAIT_SECS)
1292
1293
1294 def _collect_prior_logs(self, crashlogs_dir):
1295 """Grab prior logs that were stashed before re-installing a host.
1296
1297 @param crashlogs_dir: Directory path where crash-logs are stored.
1298 """
1299 logging.debug('%s: Found %s, collecting them...',
1300 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1301 try:
1302 self.collect_logs(client_constants.PRIOR_LOGS_DIR,
1303 crashlogs_dir, False)
1304 logging.debug('%s: %s is collected',
1305 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1306 except Exception as e:
1307 logging.error('%s: Failed to collect %s: %s',
1308 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR,
1309 e)
1310
1311
1312 def _collect_system_logs(self, crashlogs_dir):
1313 """Grab normal system logs from a host.
1314
1315 @param crashlogs_dir: Directory path where crash-logs are stored.
1316 """
1317 logging.debug('%s: Found %s, collecting system logs...',
1318 self._CRASHLOGS_PREFIX, self._LAB_MACHINE_FILE)
1319 sources = server_utils.parse_simple_config(self._LOGS_TO_COLLECT_FILE)
1320 for src in sources:
1321 try:
1322 if self.path_exists(src):
1323 logging.debug('%s: Collecting %s...',
1324 self._CRASHLOGS_PREFIX, src)
1325 dest = server_utils.concat_path_except_last(
1326 crashlogs_dir, src)
1327 self.collect_logs(src, dest, False)
1328 logging.debug('%s: %s is collected',
1329 self._CRASHLOGS_PREFIX, src)
1330 except Exception as e:
1331 logging.error('%s: Failed to collect %s: %s',
1332 self._CRASHLOGS_PREFIX, src, e)
1333
1334
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001335 def close(self):
beeps32a63082013-08-22 14:02:29 -07001336 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -07001337 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001338
1339
Dan Shi49ca0932014-11-14 11:22:27 -08001340 def get_power_supply_info(self):
1341 """Get the output of power_supply_info.
1342
1343 power_supply_info outputs the info of each power supply, e.g.,
1344 Device: Line Power
1345 online: no
1346 type: Mains
1347 voltage (V): 0
1348 current (A): 0
1349 Device: Battery
1350 state: Discharging
1351 percentage: 95.9276
1352 technology: Li-ion
1353
1354 Above output shows two devices, Line Power and Battery, with details of
1355 each device listed. This function parses the output into a dictionary,
1356 with key being the device name, and value being a dictionary of details
1357 of the device info.
1358
1359 @return: The dictionary of power_supply_info, e.g.,
1360 {'Line Power': {'online': 'yes', 'type': 'main'},
1361 'Battery': {'vendor': 'xyz', 'percentage': '100'}}
Dan Shie9b765d2014-12-29 16:59:49 -08001362 @raise error.AutoservRunError if power_supply_info tool is not found in
1363 the DUT. Caller should handle this error to avoid false failure
1364 on verification.
Dan Shi49ca0932014-11-14 11:22:27 -08001365 """
1366 result = self.run('power_supply_info').stdout.strip()
1367 info = {}
1368 device_name = None
1369 device_info = {}
1370 for line in result.split('\n'):
1371 pair = [v.strip() for v in line.split(':')]
1372 if len(pair) != 2:
1373 continue
1374 if pair[0] == 'Device':
1375 if device_name:
1376 info[device_name] = device_info
1377 device_name = pair[1]
1378 device_info = {}
1379 else:
1380 device_info[pair[0]] = pair[1]
1381 if device_name and not device_name in info:
1382 info[device_name] = device_info
1383 return info
1384
1385
1386 def get_battery_percentage(self):
1387 """Get the battery percentage.
1388
1389 @return: The percentage of battery level, value range from 0-100. Return
1390 None if the battery info cannot be retrieved.
1391 """
1392 try:
1393 info = self.get_power_supply_info()
1394 logging.info(info)
1395 return float(info['Battery']['percentage'])
Dan Shie9b765d2014-12-29 16:59:49 -08001396 except (KeyError, ValueError, error.AutoservRunError):
Dan Shi49ca0932014-11-14 11:22:27 -08001397 return None
1398
1399
1400 def is_ac_connected(self):
1401 """Check if the dut has power adapter connected and charging.
1402
1403 @return: True if power adapter is connected and charging.
1404 """
1405 try:
1406 info = self.get_power_supply_info()
1407 return info['Line Power']['online'] == 'yes'
Dan Shie9b765d2014-12-29 16:59:49 -08001408 except (KeyError, error.AutoservRunError):
1409 return None
Dan Shi49ca0932014-11-14 11:22:27 -08001410
1411
Simran Basi5e6339a2013-03-21 11:34:32 -07001412 def _cleanup_poweron(self):
1413 """Special cleanup method to make sure hosts always get power back."""
1414 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1415 hosts = afe.get_hosts(hostname=self.hostname)
1416 if not hosts or not (self._RPM_OUTLET_CHANGED in
1417 hosts[0].attributes):
1418 return
1419 logging.debug('This host has recently interacted with the RPM'
1420 ' Infrastructure. Ensuring power is on.')
1421 try:
1422 self.power_on()
Dan Shi7dca56e2014-11-11 17:07:56 -08001423 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1424 hostname=self.hostname)
Simran Basi5e6339a2013-03-21 11:34:32 -07001425 except rpm_client.RemotePowerException:
Simran Basi5e6339a2013-03-21 11:34:32 -07001426 logging.error('Failed to turn Power On for this host after '
1427 'cleanup through the RPM Infrastructure.')
Gabe Blackb72f4fb2015-01-20 16:47:13 -08001428 autotest_es.post(
Dan Shi7dca56e2014-11-11 17:07:56 -08001429 type_str='RPM_poweron_failure',
1430 metadata={'hostname': self.hostname})
Dan Shi49ca0932014-11-14 11:22:27 -08001431
1432 battery_percentage = self.get_battery_percentage()
Dan Shif01ebe22014-12-05 13:10:57 -08001433 if battery_percentage and battery_percentage < 50:
Dan Shi49ca0932014-11-14 11:22:27 -08001434 raise
1435 elif self.is_ac_connected():
1436 logging.info('The device has power adapter connected and '
1437 'charging. No need to try to turn RPM on '
1438 'again.')
1439 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1440 hostname=self.hostname)
1441 logging.info('Battery level is now at %s%%. The device may '
1442 'still have enough power to run test, so no '
1443 'exception will be raised.', battery_percentage)
1444
Simran Basi5e6339a2013-03-21 11:34:32 -07001445
beepsc87ff602013-07-31 21:53:00 -07001446 def _is_factory_image(self):
1447 """Checks if the image on the DUT is a factory image.
1448
1449 @return: True if the image on the DUT is a factory image.
1450 False otherwise.
1451 """
1452 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1453 return result.exit_status == 0
1454
1455
1456 def _restart_ui(self):
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001457 """Restart the Chrome UI.
beepsc87ff602013-07-31 21:53:00 -07001458
1459 @raises: FactoryImageCheckerException for factory images, since
1460 we cannot attempt to restart ui on them.
1461 error.AutoservRunError for any other type of error that
1462 occurs while restarting ui.
1463 """
1464 if self._is_factory_image():
1465 raise FactoryImageCheckerException('Cannot restart ui on factory '
1466 'images')
1467
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001468 # TODO(jrbarnette): The command to stop/start the ui job
1469 # should live inside cros_ui, too. However that would seem
1470 # to imply interface changes to the existing start()/restart()
1471 # functions, which is a bridge too far (for now).
1472 prompt = cros_ui.get_login_prompt_state(self)
1473 self.run('stop ui; start ui')
1474 cros_ui.wait_for_chrome_ready(prompt, self)
beepsc87ff602013-07-31 21:53:00 -07001475
1476
1477 def cleanup(self):
MK Ryu35d661e2014-09-25 17:44:10 -07001478 self.run('rm -f %s' % client_constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001479 try:
beepsc87ff602013-07-31 21:53:00 -07001480 self._restart_ui()
1481 except (error.AutotestRunError, error.AutoservRunError,
1482 FactoryImageCheckerException):
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001483 logging.warning('Unable to restart ui, rebooting device.')
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001484 # Since restarting the UI fails fall back to normal Autotest
1485 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001486 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001487 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001488 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001489 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001490
1491
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001492 def reboot(self, **dargs):
1493 """
1494 This function reboots the site host. The more generic
1495 RemoteHost.reboot() performs sync and sleeps for 5
1496 seconds. This is not necessary for Chrome OS devices as the
1497 sync should be finished in a short time during the reboot
1498 command.
1499 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001500 if 'reboot_cmd' not in dargs:
Doug Anderson7d5aeb22014-02-27 15:12:17 -08001501 reboot_timeout = dargs.get('reboot_timeout', 10)
1502 dargs['reboot_cmd'] = ('((reboot & sleep %d; reboot -f &)'
1503 ' </dev/null >/dev/null 2>&1 &)' %
1504 reboot_timeout)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001505 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001506 if 'fastsync' not in dargs:
1507 dargs['fastsync'] = True
Michael Liangda8c60a2014-06-03 13:24:51 -07001508
Charlie Mooneya8e6dab2014-05-29 14:37:55 -07001509 # For purposes of logging reboot times:
1510 # Get the board name i.e. 'daisy_spring'
Michael Liangca4f5a62014-07-10 15:45:13 -07001511 board_fullname = self.get_board()
1512
1513 # Strip the prefix and add it to dargs.
1514 dargs['board'] = board_fullname[board_fullname.find(':')+1:]
Fang Deng0ca40e22013-08-27 17:47:44 -07001515 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001516
1517
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07001518 def suspend(self, **dargs):
1519 """
1520 This function suspends the site host.
1521 """
1522 suspend_time = dargs.get('suspend_time', 60)
1523 dargs['timeout'] = suspend_time
1524 if 'suspend_cmd' not in dargs:
1525 cmd = ' && '.join(['echo 0 > /sys/class/rtc/rtc0/wakealarm',
1526 'echo +%d > /sys/class/rtc/rtc0/wakealarm' % suspend_time,
1527 'powerd_dbus_suspend --delay=0 &'])
1528 dargs['suspend_cmd'] = ('(( %s )'
1529 '< /dev/null >/dev/null 2>&1 &)' % cmd)
1530 super(CrosHost, self).suspend(**dargs)
1531
1532
Simran Basiec564392014-08-25 16:48:09 -07001533 def upstart_status(self, service_name):
1534 """Check the status of an upstart init script.
1535
1536 @param service_name: Service to look up.
1537
1538 @returns True if the service is running, False otherwise.
1539 """
1540 return self.run('status %s | grep start/running' %
1541 service_name).stdout.strip() != ''
1542
1543
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001544 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001545 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001546
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001547 Tests for the following conditions:
1548 1. All conditions tested by the parent version of this
1549 function.
1550 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001551 3. Sufficient space in /mnt/stateful_partition/encrypted.
1552 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001553
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001554 """
MK Ryu35d661e2014-09-25 17:44:10 -07001555 # Check if a job was crashed on this host.
1556 # If yes, avoid verification until crash-logs are collected.
1557 if self._need_crash_logs():
1558 raise error.AutoservCrashLogCollectRequired(
1559 'Need to collect crash-logs before verification')
1560
Fang Deng0ca40e22013-08-27 17:47:44 -07001561 super(CrosHost, self).verify_software()
J. Richard Barnette4164d1d2014-12-02 17:52:33 -08001562 self.check_inodes(
1563 '/mnt/stateful_partition',
1564 global_config.global_config.get_config_value(
1565 'SERVER', 'kilo_inodes_required', type=int,
1566 default=100))
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001567 self.check_diskspace(
1568 '/mnt/stateful_partition',
1569 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001570 'SERVER', 'gb_diskspace_required', type=float,
1571 default=20.0))
Gaurav Shahe448af82014-06-19 15:18:59 -07001572 encrypted_stateful_path = '/mnt/stateful_partition/encrypted'
1573 # Not all targets build with encrypted stateful support.
1574 if self.path_exists(encrypted_stateful_path):
1575 self.check_diskspace(
1576 encrypted_stateful_path,
1577 global_config.global_config.get_config_value(
1578 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1579 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001580
Simran Basiec564392014-08-25 16:48:09 -07001581 if not self.upstart_status('system-services'):
Prashanth B5d0a0512014-04-25 12:26:08 -07001582 raise error.AutoservError('Chrome failed to reach login. '
1583 'System services not running.')
1584
beepsc87ff602013-07-31 21:53:00 -07001585 # Factory images don't run update engine,
1586 # goofy controls dbus on these DUTs.
1587 if not self._is_factory_image():
1588 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001589 # Makes sure python is present, loads and can use built in functions.
1590 # We have seen cases where importing cPickle fails with undefined
1591 # symbols in cPickle.so.
1592 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001593
1594
Dan Shi49ca0932014-11-14 11:22:27 -08001595 def verify_hardware(self):
1596 """Verify hardware system of a Chrome OS system.
1597
1598 Check following hardware conditions:
1599 1. Battery level.
1600 2. Is power adapter connected.
1601 """
1602 logging.info('Battery percentage: %s', self.get_battery_percentage())
Dan Shie9b765d2014-12-29 16:59:49 -08001603 if self.is_ac_connected() is None:
1604 logging.info('Can not determine if the device has power adapter '
1605 'connected.')
1606 else:
1607 logging.info('Device %s power adapter connected and charging.',
1608 'has' if self.is_ac_connected() else 'does not have')
Dan Shi49ca0932014-11-14 11:22:27 -08001609
1610
Fang Deng96667ca2013-08-01 17:46:18 -07001611 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1612 connect_timeout=None, alive_interval=None):
1613 """Override default make_ssh_command to use options tuned for Chrome OS.
1614
1615 Tuning changes:
1616 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1617 connection failure. Consistency with remote_access.sh.
1618
1619 - ServerAliveInterval=180; which causes SSH to ping connection every
1620 180 seconds. In conjunction with ServerAliveCountMax ensures
1621 that if the connection dies, Autotest will bail out quickly.
1622 Originally tried 60 secs, but saw frequent job ABORTS where
1623 the test completed successfully.
1624
1625 - ServerAliveCountMax=3; consistency with remote_access.sh.
1626
1627 - ConnectAttempts=4; reduce flakiness in connection errors;
1628 consistency with remote_access.sh.
1629
1630 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1631 Host keys change with every new installation, don't waste
1632 memory/space saving them.
1633
1634 - SSH protocol forced to 2; needed for ServerAliveInterval.
1635
1636 @param user User name to use for the ssh connection.
1637 @param port Port on the target host to use for ssh connection.
1638 @param opts Additional options to the ssh command.
1639 @param hosts_file Ignored.
1640 @param connect_timeout Ignored.
1641 @param alive_interval Ignored.
1642 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001643 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1644 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001645 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1646 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1647 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1648 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001649 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1650 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001651
1652
beeps32a63082013-08-22 14:02:29 -07001653 def _create_ssh_tunnel(self, port, local_port):
1654 """Create an ssh tunnel from local_port to port.
1655
1656 @param port: remote port on the host.
1657 @param local_port: local forwarding port.
1658
1659 @return: the tunnel process.
1660 """
1661 # Chrome OS on the target closes down most external ports
1662 # for security. We could open the port, but doing that
1663 # would conflict with security tests that check that only
1664 # expected ports are open. So, to get to the port on the
1665 # target we use an ssh tunnel.
1666 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1667 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1668 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1669 logging.debug('Full tunnel command: %s', tunnel_cmd)
1670 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1671 logging.debug('Started ssh tunnel, local = %d'
1672 ' remote = %d, pid = %d',
1673 local_port, port, tunnel_proc.pid)
1674 return tunnel_proc
1675
1676
Christopher Wileydd181852013-10-10 19:56:58 -07001677 def _setup_rpc(self, port, command_name, remote_pid=None):
beeps32a63082013-08-22 14:02:29 -07001678 """Sets up a tunnel process and performs rpc connection book keeping.
1679
1680 This method assumes that xmlrpc and jsonrpc never conflict, since
1681 we can only either have an xmlrpc or a jsonrpc server listening on
1682 a remote port. As such, it enforces a single proxy->remote port
1683 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1684 and then tries to start an xmlrpc proxy forwarded to the same port,
1685 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1686
1687 1. None of the methods on the xmlrpc proxy will work because
1688 the server listening on B is jsonrpc.
1689
1690 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1691 server, as the only use case currently is goofy, which is tied to
1692 the factory image. It is much easier to handle a failed xmlrpc
1693 call on the client than it is to terminate goofy in this scenario,
1694 as doing the latter might leave the DUT in a hard to recover state.
1695
1696 With the current implementation newer rpc proxy connections will
1697 terminate the tunnel processes of older rpc connections tunneling
1698 to the same remote port. If methods are invoked on the client
1699 after this has happened they will fail with connection closed errors.
1700
1701 @param port: The remote forwarding port.
1702 @param command_name: The name of the remote process, to terminate
1703 using pkill.
1704
1705 @return A url that we can use to initiate the rpc connection.
1706 """
1707 self.rpc_disconnect(port)
1708 local_port = utils.get_unused_port()
1709 tunnel_proc = self._create_ssh_tunnel(port, local_port)
Christopher Wileydd181852013-10-10 19:56:58 -07001710 self._rpc_proxy_map[port] = (command_name, tunnel_proc, remote_pid)
beeps32a63082013-08-22 14:02:29 -07001711 return self._RPC_PROXY_URL % local_port
1712
1713
Christopher Wileyd78249a2013-03-01 13:05:31 -08001714 def xmlrpc_connect(self, command, port, command_name=None,
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001715 ready_test_name=None, timeout_seconds=10,
1716 logfile='/dev/null'):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001717 """Connect to an XMLRPC server on the host.
1718
1719 The `command` argument should be a simple shell command that
1720 starts an XMLRPC server on the given `port`. The command
1721 must not daemonize, and must terminate cleanly on SIGTERM.
1722 The command is started in the background on the host, and a
1723 local XMLRPC client for the server is created and returned
1724 to the caller.
1725
1726 Note that the process of creating an XMLRPC client makes no
1727 attempt to connect to the remote server; the caller is
1728 responsible for determining whether the server is running
1729 correctly, and is ready to serve requests.
1730
Christopher Wileyd78249a2013-03-01 13:05:31 -08001731 Optionally, the caller can pass ready_test_name, a string
1732 containing the name of a method to call on the proxy. This
1733 method should take no parameters and return successfully only
1734 when the server is ready to process client requests. When
1735 ready_test_name is set, xmlrpc_connect will block until the
1736 proxy is ready, and throw a TestError if the server isn't
1737 ready by timeout_seconds.
1738
beeps32a63082013-08-22 14:02:29 -07001739 If a server is already running on the remote port, this
1740 method will kill it and disconnect the tunnel process
1741 associated with the connection before establishing a new one,
1742 by consulting the rpc_proxy_map in rpc_disconnect.
1743
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001744 @param command Shell command to start the server.
1745 @param port Port number on which the server is expected to
1746 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001747 @param command_name String to use as input to `pkill` to
1748 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001749 @param ready_test_name String containing the name of a
1750 method defined on the XMLRPC server.
1751 @param timeout_seconds Number of seconds to wait
1752 for the server to become 'ready.' Will throw a
1753 TestFail error if server is not ready in time.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001754 @param logfile Logfile to send output when running
1755 'command' argument.
Yusuf Mohsinally8d19e3c2013-11-21 14:25:45 -08001756
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001757 """
Christopher Wileyc14f06a2013-10-16 13:55:39 -07001758 # Clean up any existing state. If the caller is willing
1759 # to believe their server is down, we ought to clean up
1760 # any tunnels we might have sitting around.
1761 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001762 # Start the server on the host. Redirection in the command
1763 # below is necessary, because 'ssh' won't terminate until
1764 # background child processes close stdin, stdout, and
1765 # stderr.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001766 remote_cmd = '%s </dev/null >%s 2>&1 & echo $!' % (command, logfile)
Christopher Wileydd181852013-10-10 19:56:58 -07001767 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001768 logging.debug('Started XMLRPC server on host %s, pid = %s',
1769 self.hostname, remote_pid)
1770
Christopher Wileydd181852013-10-10 19:56:58 -07001771 # Tunnel through SSH to be able to reach that remote port.
1772 rpc_url = self._setup_rpc(port, command_name, remote_pid=remote_pid)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001773 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001774
Christopher Wileyd78249a2013-03-01 13:05:31 -08001775 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001776 # retry.retry logs each attempt; calculate delay_sec to
1777 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001778 @retry.retry((socket.error,
1779 xmlrpclib.ProtocolError,
1780 httplib.BadStatusLine),
Chris Sosa65425082013-10-16 13:26:22 -07001781 timeout_min=timeout_seconds / 60.0,
1782 delay_sec=min(max(timeout_seconds / 20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001783 def ready_test():
1784 """ Call proxy.ready_test_name(). """
1785 getattr(proxy, ready_test_name)()
1786 successful = False
1787 try:
1788 logging.info('Waiting %d seconds for XMLRPC server '
1789 'to start.', timeout_seconds)
1790 ready_test()
1791 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001792 finally:
1793 if not successful:
1794 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001795 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001796 logging.info('XMLRPC server started successfully.')
1797 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001798
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001799
Jason Abeleb6f924f2013-11-13 16:01:54 -08001800 def syslog(self, message, tag='autotest'):
1801 """Logs a message to syslog on host.
1802
1803 @param message String message to log into syslog
1804 @param tag String tag prefix for syslog
1805
1806 """
1807 self.run('logger -t "%s" "%s"' % (tag, message))
1808
1809
beeps32a63082013-08-22 14:02:29 -07001810 def jsonrpc_connect(self, port):
1811 """Creates a jsonrpc proxy connection through an ssh tunnel.
1812
1813 This method exists to facilitate communication with goofy (which is
1814 the default system manager on all factory images) and as such, leaves
1815 most of the rpc server sanity checking to the caller. Unlike
1816 xmlrpc_connect, this method does not facilitate the creation of a remote
1817 jsonrpc server, as the only clients of this code are factory tests,
1818 for which the goofy system manager is built in to the image and starts
1819 when the target boots.
1820
1821 One can theoretically create multiple jsonrpc proxies all forwarded
1822 to the same remote port, provided the remote port has an rpc server
1823 listening. However, in doing so we stand the risk of leaking an
1824 existing tunnel process, so we always disconnect any older tunnels
1825 we might have through rpc_disconnect.
1826
1827 @param port: port on the remote host that is serving this proxy.
1828
1829 @return: The client proxy.
1830 """
1831 if not jsonrpclib:
1832 logging.warning('Jsonrpclib could not be imported. Check that '
1833 'site-packages contains jsonrpclib.')
1834 return None
1835
1836 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1837
1838 logging.info('Established a jsonrpc connection through port %s.', port)
1839 return proxy
1840
1841
1842 def rpc_disconnect(self, port):
1843 """Disconnect from an RPC server on the host.
1844
1845 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001846 the given `port`. Also closes the local ssh tunnel created
1847 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001848 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001849 client object; however disconnection will cause all
1850 subsequent calls to methods on the object to fail.
1851
1852 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001853 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001854
1855 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001856 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001857 """
beeps32a63082013-08-22 14:02:29 -07001858 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001859 return
Christopher Wileydd181852013-10-10 19:56:58 -07001860 remote_name, tunnel_proc, remote_pid = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001861 if remote_name:
1862 # We use 'pkill' to find our target process rather than
1863 # a PID, because the host may have rebooted since
1864 # connecting, and we don't want to kill an innocent
1865 # process with the same PID.
1866 #
1867 # 'pkill' helpfully exits with status 1 if no target
1868 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001869 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001870 # status.
1871 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001872 if remote_pid:
1873 logging.info('Waiting for RPC server "%s" shutdown',
1874 remote_name)
1875 start_time = time.time()
1876 while (time.time() - start_time <
1877 self._RPC_SHUTDOWN_TIMEOUT_SECONDS):
1878 running_processes = self.run(
1879 "pgrep -f '%s'" % remote_name,
1880 ignore_status=True).stdout.split()
1881 if not remote_pid in running_processes:
1882 logging.info('Shut down RPC server.')
1883 break
1884 time.sleep(self._RPC_SHUTDOWN_POLLING_PERIOD_SECONDS)
1885 else:
1886 raise error.TestError('Failed to shutdown RPC server %s' %
1887 remote_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001888
1889 if tunnel_proc.poll() is None:
1890 tunnel_proc.terminate()
1891 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1892 else:
1893 logging.debug('Tunnel pid %d terminated early, status %d',
1894 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001895 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001896
1897
beeps32a63082013-08-22 14:02:29 -07001898 def rpc_disconnect_all(self):
1899 """Disconnect all known RPC proxy ports."""
1900 for port in self._rpc_proxy_map.keys():
1901 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001902
1903
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001904 def poor_mans_rpc(self, fun):
1905 """
1906 Calls a function from client utils on the host and returns a string.
1907
1908 @param fun function in client utils namespace.
1909 @return output string from calling fun.
1910 """
Simran Basi263a9d32014-08-19 11:16:51 -07001911 script = 'cd %s/bin; ' % autotest.Autotest.get_installed_autodir(self)
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001912 script += 'python -c "import common; import utils;'
1913 script += 'print utils.%s"' % fun
1914 return script
1915
1916
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001917 def _ping_check_status(self, status):
1918 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001919
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001920 @param status Check the ping status against this value.
1921 @return True iff `status` and the result of ping are the same
1922 (i.e. both True or both False).
1923
1924 """
1925 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1926 return not (status ^ (ping_val == 0))
1927
1928 def _ping_wait_for_status(self, status, timeout):
1929 """Wait for the host to have a given status (UP or DOWN).
1930
1931 Status is checked by polling. Polling will not last longer
1932 than the number of seconds in `timeout`. The polling
1933 interval will be long enough that only approximately
1934 _PING_WAIT_COUNT polling cycles will be executed, subject
1935 to a maximum interval of about one minute.
1936
1937 @param status Waiting will stop immediately if `ping` of the
1938 host returns this status.
1939 @param timeout Poll for at most this many seconds.
1940 @return True iff the host status from `ping` matched the
1941 requested status at the time of return.
1942
1943 """
1944 # _ping_check_status() takes about 1 second, hence the
1945 # "- 1" in the formula below.
1946 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1947 end_time = time.time() + timeout
1948 while time.time() <= end_time:
1949 if self._ping_check_status(status):
1950 return True
1951 if poll_interval > 0:
1952 time.sleep(poll_interval)
1953
1954 # The last thing we did was sleep(poll_interval), so it may
1955 # have been too long since the last `ping`. Check one more
1956 # time, just to be sure.
1957 return self._ping_check_status(status)
1958
1959 def ping_wait_up(self, timeout):
1960 """Wait for the host to respond to `ping`.
1961
1962 N.B. This method is not a reliable substitute for
1963 `wait_up()`, because a host that responds to ping will not
1964 necessarily respond to ssh. This method should only be used
1965 if the target DUT can be considered functional even if it
1966 can't be reached via ssh.
1967
1968 @param timeout Minimum time to allow before declaring the
1969 host to be non-responsive.
1970 @return True iff the host answered to ping before the timeout.
1971
1972 """
1973 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001974
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001975 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001976 """Wait until the host no longer responds to `ping`.
1977
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001978 This function can be used as a slightly faster version of
1979 `wait_down()`, by avoiding potentially long ssh timeouts.
1980
1981 @param timeout Minimum time to allow for the host to become
1982 non-responsive.
1983 @return True iff the host quit answering ping before the
1984 timeout.
1985
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001986 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001987 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001988
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001989 def test_wait_for_sleep(self, sleep_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001990 """Wait for the client to enter low-power sleep mode.
1991
1992 The test for "is asleep" can't distinguish a system that is
1993 powered off; to confirm that the unit was asleep, it is
1994 necessary to force resume, and then call
1995 `test_wait_for_resume()`.
1996
1997 This function is expected to be called from a test as part
1998 of a sequence like the following:
1999
2000 ~~~~~~~~
2001 boot_id = host.get_boot_id()
2002 # trigger sleep on the host
2003 host.test_wait_for_sleep()
2004 # trigger resume on the host
2005 host.test_wait_for_resume(boot_id)
2006 ~~~~~~~~
2007
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002008 @param sleep_timeout time limit in seconds to allow the host sleep.
2009
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002010 @exception TestFail The host did not go to sleep within
2011 the allowed time.
2012 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002013 if sleep_timeout is None:
2014 sleep_timeout = self.SLEEP_TIMEOUT
2015
2016 if not self.ping_wait_down(timeout=sleep_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002017 raise error.TestFail(
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002018 'client failed to sleep after %d seconds' % sleep_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002019
2020
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002021 def test_wait_for_resume(self, old_boot_id, resume_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002022 """Wait for the client to resume from low-power sleep mode.
2023
2024 The `old_boot_id` parameter should be the value from
2025 `get_boot_id()` obtained prior to entering sleep mode. A
2026 `TestFail` exception is raised if the boot id changes.
2027
2028 See @ref test_wait_for_sleep for more on this function's
2029 usage.
2030
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002031 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002032 target host went to sleep.
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002033 @param resume_timeout time limit in seconds to allow the host up.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002034
2035 @exception TestFail The host did not respond within the
2036 allowed time.
2037 @exception TestFail The host responded, but the boot id test
2038 indicated a reboot rather than a sleep
2039 cycle.
2040 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002041 if resume_timeout is None:
2042 resume_timeout = self.RESUME_TIMEOUT
2043
2044 if not self.wait_up(timeout=resume_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002045 raise error.TestFail(
2046 'client failed to resume from sleep after %d seconds' %
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002047 resume_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002048 else:
2049 new_boot_id = self.get_boot_id()
2050 if new_boot_id != old_boot_id:
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002051 logging.error('client rebooted (old boot %s, new boot %s)',
2052 old_boot_id, new_boot_id)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002053 raise error.TestFail(
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002054 'client rebooted, but sleep was expected')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002055
2056
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002057 def test_wait_for_shutdown(self, shutdown_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002058 """Wait for the client to shut down.
2059
2060 The test for "has shut down" can't distinguish a system that
2061 is merely asleep; to confirm that the unit was down, it is
2062 necessary to force boot, and then call test_wait_for_boot().
2063
2064 This function is expected to be called from a test as part
2065 of a sequence like the following:
2066
2067 ~~~~~~~~
2068 boot_id = host.get_boot_id()
2069 # trigger shutdown on the host
2070 host.test_wait_for_shutdown()
2071 # trigger boot on the host
2072 host.test_wait_for_boot(boot_id)
2073 ~~~~~~~~
2074
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002075 @param shutdown_timeout time limit in seconds to allow the host down.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002076 @exception TestFail The host did not shut down within the
2077 allowed time.
2078 """
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002079 if shutdown_timeout is None:
2080 shutdown_timeout = self.SHUTDOWN_TIMEOUT
2081
2082 if not self.ping_wait_down(timeout=shutdown_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002083 raise error.TestFail(
2084 'client failed to shut down after %d seconds' %
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002085 shutdown_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002086
2087
2088 def test_wait_for_boot(self, old_boot_id=None):
2089 """Wait for the client to boot from cold power.
2090
2091 The `old_boot_id` parameter should be the value from
2092 `get_boot_id()` obtained prior to shutting down. A
2093 `TestFail` exception is raised if the boot id does not
2094 change. The boot id test is omitted if `old_boot_id` is not
2095 specified.
2096
2097 See @ref test_wait_for_shutdown for more on this function's
2098 usage.
2099
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002100 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002101 shut down.
2102
2103 @exception TestFail The host did not respond within the
2104 allowed time.
2105 @exception TestFail The host responded, but the boot id test
2106 indicated that there was no reboot.
2107 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002108 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002109 raise error.TestFail(
2110 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002111 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002112 elif old_boot_id:
2113 if self.get_boot_id() == old_boot_id:
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002114 logging.error('client not rebooted (boot %s)',
2115 old_boot_id)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002116 raise error.TestFail(
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002117 'client is back up, but did not reboot')
Simran Basid5e5e272012-09-24 15:23:59 -07002118
2119
2120 @staticmethod
2121 def check_for_rpm_support(hostname):
2122 """For a given hostname, return whether or not it is powered by an RPM.
2123
Simran Basi1df55112013-09-06 11:25:09 -07002124 @param hostname: hostname to check for rpm support.
2125
Simran Basid5e5e272012-09-24 15:23:59 -07002126 @return None if this host does not follows the defined naming format
2127 for RPM powered DUT's in the lab. If it does follow the format,
2128 it returns a regular expression MatchObject instead.
2129 """
Fang Dengbaff9082015-01-06 13:46:15 -08002130 return re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002131
2132
2133 def has_power(self):
2134 """For this host, return whether or not it is powered by an RPM.
2135
2136 @return True if this host is in the CROS lab and follows the defined
2137 naming format.
2138 """
Fang Deng0ca40e22013-08-27 17:47:44 -07002139 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002140
2141
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002142 def _set_power(self, state, power_method):
2143 """Sets the power to the host via RPM, Servo or manual.
2144
2145 @param state Specifies which power state to set to DUT
2146 @param power_method Specifies which method of power control to
2147 use. By default "RPM" will be used. Valid values
2148 are the strings "RPM", "manual", "servoj10".
2149
2150 """
2151 ACCEPTABLE_STATES = ['ON', 'OFF']
2152
2153 if state.upper() not in ACCEPTABLE_STATES:
2154 raise error.TestError('State must be one of: %s.'
2155 % (ACCEPTABLE_STATES,))
2156
2157 if power_method == self.POWER_CONTROL_SERVO:
2158 logging.info('Setting servo port J10 to %s', state)
2159 self.servo.set('prtctl3_pwren', state.lower())
2160 time.sleep(self._USB_POWER_TIMEOUT)
2161 elif power_method == self.POWER_CONTROL_MANUAL:
2162 logging.info('You have %d seconds to set the AC power to %s.',
2163 self._POWER_CYCLE_TIMEOUT, state)
2164 time.sleep(self._POWER_CYCLE_TIMEOUT)
2165 else:
2166 if not self.has_power():
2167 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07002168 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
2169 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
2170 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07002171 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07002172
2173
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002174 def power_off(self, power_method=POWER_CONTROL_RPM):
2175 """Turn off power to this host via RPM, Servo or manual.
2176
2177 @param power_method Specifies which method of power control to
2178 use. By default "RPM" will be used. Valid values
2179 are the strings "RPM", "manual", "servoj10".
2180
2181 """
2182 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07002183
2184
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002185 def power_on(self, power_method=POWER_CONTROL_RPM):
2186 """Turn on power to this host via RPM, Servo or manual.
2187
2188 @param power_method Specifies which method of power control to
2189 use. By default "RPM" will be used. Valid values
2190 are the strings "RPM", "manual", "servoj10".
2191
2192 """
2193 self._set_power('ON', power_method)
2194
2195
2196 def power_cycle(self, power_method=POWER_CONTROL_RPM):
2197 """Cycle power to this host by turning it OFF, then ON.
2198
2199 @param power_method Specifies which method of power control to
2200 use. By default "RPM" will be used. Valid values
2201 are the strings "RPM", "manual", "servoj10".
2202
2203 """
2204 if power_method in (self.POWER_CONTROL_SERVO,
2205 self.POWER_CONTROL_MANUAL):
2206 self.power_off(power_method=power_method)
2207 time.sleep(self._POWER_CYCLE_TIMEOUT)
2208 self.power_on(power_method=power_method)
2209 else:
2210 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002211
2212
2213 def get_platform(self):
2214 """Determine the correct platform label for this host.
2215
2216 @returns a string representing this host's platform.
2217 """
2218 crossystem = utils.Crossystem(self)
2219 crossystem.init()
2220 # Extract fwid value and use the leading part as the platform id.
2221 # fwid generally follow the format of {platform}.{firmware version}
2222 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
2223 platform = crossystem.fwid().split('.')[0].lower()
2224 # Newer platforms start with 'Google_' while the older ones do not.
2225 return platform.replace('google_', '')
2226
2227
Hung-ying Tyanb1328032014-04-01 14:18:54 +08002228 def get_architecture(self):
2229 """Determine the correct architecture label for this host.
2230
2231 @returns a string representing this host's architecture.
2232 """
2233 crossystem = utils.Crossystem(self)
2234 crossystem.init()
2235 return crossystem.arch()
2236
2237
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002238 def get_chrome_version(self):
2239 """Gets the Chrome version number and milestone as strings.
2240
2241 Invokes "chrome --version" to get the version number and milestone.
2242
2243 @return A tuple (chrome_ver, milestone) where "chrome_ver" is the
2244 current Chrome version number as a string (in the form "W.X.Y.Z")
2245 and "milestone" is the first component of the version number
2246 (the "W" from "W.X.Y.Z"). If the version number cannot be parsed
2247 in the "W.X.Y.Z" format, the "chrome_ver" will be the full output
2248 of "chrome --version" and the milestone will be the empty string.
2249
2250 """
MK Ryu35d661e2014-09-25 17:44:10 -07002251 version_string = self.run(client_constants.CHROME_VERSION_COMMAND).stdout
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002252 return utils.parse_chrome_version(version_string)
2253
Aviv Keshet74c89a92013-02-04 15:18:30 -08002254 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07002255 def get_board(self):
2256 """Determine the correct board label for this host.
2257
2258 @returns a string representing this host's board.
2259 """
2260 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
2261 run_method=self.run)
2262 board = release_info['CHROMEOS_RELEASE_BOARD']
2263 # Devices in the lab generally have the correct board name but our own
2264 # development devices have {board_name}-signed-{key_type}. The board
2265 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08002266 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07002267 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08002268 return board_format_string % board.split('-')[0]
2269 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07002270
2271
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002272 @label_decorator('board_freq_mem')
2273 def get_board_with_frequency_and_memory(self):
2274 """
2275 Determines the board name with frequency and memory.
2276
2277 @returns a more detailed string representing the board. Examples are
2278 butterfly_1.1GHz_2GB, link_1.8GHz_4GB, x86-zgb_1.7GHz_2GB
2279 """
2280 board = self.run(self.poor_mans_rpc(
2281 'get_board_with_frequency_and_memory()')).stdout
2282 return 'board_freq_mem:%s' % str.strip(board)
2283
2284
Aviv Keshet74c89a92013-02-04 15:18:30 -08002285 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002286 def has_lightsensor(self):
2287 """Determine the correct board label for this host.
2288
2289 @returns the string 'lightsensor' if this host has a lightsensor or
2290 None if it does not.
2291 """
2292 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08002293 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07002294 try:
2295 # Run the search cmd following the symlinks. Stderr_tee is set to
2296 # None as there can be a symlink loop, but the command will still
2297 # execute correctly with a few messages printed to stderr.
2298 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
2299 return 'lightsensor'
2300 except error.AutoservRunError:
2301 # egrep exited with a return code of 1 meaning none of the possible
2302 # lightsensor files existed.
2303 return None
2304
2305
Aviv Keshet74c89a92013-02-04 15:18:30 -08002306 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002307 def has_bluetooth(self):
2308 """Determine the correct board label for this host.
2309
2310 @returns the string 'bluetooth' if this host has bluetooth or
2311 None if it does not.
2312 """
2313 try:
2314 self.run('test -d /sys/class/bluetooth/hci0')
2315 # test exited with a return code of 0.
2316 return 'bluetooth'
2317 except error.AutoservRunError:
2318 # test exited with a return code 1 meaning the directory did not
2319 # exist.
2320 return None
2321
2322
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002323 @label_decorator('gpu_family')
2324 def get_gpu_family(self):
2325 """
2326 Determine GPU family.
2327
2328 @returns a string representing the gpu family. Examples are mali, tegra,
2329 pinetrail, sandybridge, ivybridge, haswell and baytrail.
2330 """
2331 gpu_family = self.run(self.poor_mans_rpc('get_gpu_family()')).stdout
2332 return 'gpu_family:%s' % str.strip(gpu_family)
2333
2334
Ilja Friedel0ce0b602013-08-15 18:45:27 -07002335 @label_decorator('graphics')
2336 def get_graphics(self):
2337 """
2338 Determine the correct board label for this host.
2339
2340 @returns a string representing this host's graphics. For now ARM boards
2341 return graphics:gles while all other boards return graphics:gl. This
2342 may change over time, but for robustness reasons this should avoid
2343 executing code in actual graphics libraries (which may not be ready and
2344 is tested by graphics_GLAPICheck).
2345 """
2346 uname = self.run('uname -a').stdout.lower()
2347 if 'arm' in uname:
2348 return 'graphics:gles'
2349 return 'graphics:gl'
2350
2351
Bill Richardson4f595f52014-02-13 16:20:26 -08002352 @label_decorator('ec')
2353 def get_ec(self):
2354 """
2355 Determine the type of EC on this host.
2356
2357 @returns a string representing this host's embedded controller type.
2358 At present, it only returns "ec:cros", for Chrome OS ECs. Other types
2359 of EC (or none) don't return any strings, since no tests depend on
2360 those.
2361 """
2362 cmd = 'mosys ec info'
2363 # The output should look like these, so that the last field should
2364 # match our EC version scheme:
2365 #
2366 # stm | stm32f100 | snow_v1.3.139-375eb9f
2367 # ti | Unknown-10de | peppy_v1.5.114-5d52788
2368 #
2369 # Non-Chrome OS ECs will look like these:
2370 #
2371 # ENE | KB932 | 00BE107A00
2372 # ite | it8518 | 3.08
2373 #
2374 # And some systems don't have ECs at all (Lumpy, for example).
2375 regexp = r'^.*\|\s*(\S+_v\d+\.\d+\.\d+-[0-9a-f]+)\s*$'
2376
2377 ecinfo = self.run(command=cmd, ignore_status=True)
2378 if ecinfo.exit_status == 0:
2379 res = re.search(regexp, ecinfo.stdout)
2380 if res:
2381 logging.info("EC version is %s", res.groups()[0])
2382 return 'ec:cros'
2383 logging.info("%s got: %s", cmd, ecinfo.stdout)
2384 # Has an EC, but it's not a Chrome OS EC
2385 return None
2386 logging.info("%s exited with status %d", cmd, ecinfo.exit_status)
2387 # No EC present
2388 return None
2389
2390
Alec Berg31b932b2014-04-04 16:09:11 -07002391 @label_decorator('accels')
2392 def get_accels(self):
2393 """
2394 Determine the type of accelerometers on this host.
2395
2396 @returns a string representing this host's accelerometer type.
2397 At present, it only returns "accel:cros-ec", for accelerometers
2398 attached to a Chrome OS EC, or none, if no accelerometers.
2399 """
2400 # Check to make sure we have ectool
2401 rv = self.run('which ectool', ignore_status=True)
2402 if rv.exit_status:
2403 logging.info("No ectool cmd found, assuming no EC accelerometers")
2404 return None
2405
2406 # Check that the EC supports the motionsense command
2407 rv = self.run('ectool motionsense', ignore_status=True)
2408 if rv.exit_status:
2409 logging.info("EC does not support motionsense command "
2410 "assuming no EC accelerometers")
2411 return None
2412
2413 # Check that EC motion sensors are active
2414 active = self.run('ectool motionsense active').stdout.split('\n')
2415 if active[0] == "0":
2416 logging.info("Motion sense inactive, assuming no EC accelerometers")
2417 return None
2418
2419 logging.info("EC accelerometers found")
2420 return 'accel:cros-ec'
2421
2422
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002423 @label_decorator('chameleon')
2424 def has_chameleon(self):
2425 """Determine if a Chameleon connected to this host.
2426
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002427 @returns a list containing two strings ('chameleon' and
2428 'chameleon:' + label, e.g. 'chameleon:hdmi') if this host
2429 has a Chameleon or None if it has not.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002430 """
2431 if self._chameleon_host:
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002432 return ['chameleon', 'chameleon:' + self.chameleon.get_label()]
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002433 else:
2434 return None
2435
2436
Cheng-Yi Chiangf4104ff2014-12-23 19:39:01 +08002437 @label_decorator('audio_loopback_dongle')
2438 def has_loopback_dongle(self):
2439 """Determine if an audio loopback dongle is plugged to this host.
2440
2441 @returns 'audio_loopback_dongle' when there is an audio loopback dongle
2442 plugged to this host.
2443 None when there is no audio loopback dongle
2444 plugged to this host.
2445 """
2446 server_info = self.run(command='cras_test_client --dump_s',
2447 ignore_status=True).stdout
2448 if (cras_utils.node_type_is_plugged('HEADPHONE', server_info) and
2449 cras_utils.node_type_is_plugged('MIC', server_info)):
2450 return 'audio_loopback_dongle'
2451 else:
2452 return None
2453
2454
Derek Basehorec71ff622014-07-07 15:18:40 -07002455 @label_decorator('power_supply')
2456 def get_power_supply(self):
2457 """
2458 Determine what type of power supply the host has
2459
2460 @returns a string representing this host's power supply.
2461 'power:battery' when the device has a battery intended for
2462 extended use
2463 'power:AC_primary' when the device has a battery not intended
2464 for extended use (for moving the machine, etc)
2465 'power:AC_only' when the device has no battery at all.
2466 """
2467 psu = self.run(command='mosys psu type', ignore_status=True)
2468 if psu.exit_status:
2469 # The psu command for mosys is not included for all platforms. The
2470 # assumption is that the device will have a battery if the command
2471 # is not found.
2472 return 'power:battery'
2473
2474 psu_str = psu.stdout.strip()
2475 if psu_str == 'unknown':
2476 return None
2477
2478 return 'power:%s' % psu_str
2479
2480
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002481 @label_decorator('storage')
2482 def get_storage(self):
2483 """
2484 Determine the type of boot device for this host.
2485
2486 Determine if the internal device is SCSI or dw_mmc device.
2487 Then check that it is SSD or HDD or eMMC or something else.
2488
2489 @returns a string representing this host's internal device type.
2490 'storage:ssd' when internal device is solid state drive
2491 'storage:hdd' when internal device is hard disk drive
2492 'storage:mmc' when internal device is mmc drive
2493 None When internal device is something else or
2494 when we are unable to determine the type
2495 """
2496 # The output should be /dev/mmcblk* for SD/eMMC or /dev/sd* for scsi
2497 rootdev_cmd = ' '.join(['. /usr/sbin/write_gpt.sh;',
2498 '. /usr/share/misc/chromeos-common.sh;',
2499 'load_base_vars;',
2500 'get_fixed_dst_drive'])
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002501 rootdev = self.run(command=rootdev_cmd, ignore_status=True)
2502 if rootdev.exit_status:
2503 logging.info("Fail to run %s", rootdev_cmd)
2504 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002505 rootdev_str = rootdev.stdout.strip()
2506
2507 if not rootdev_str:
2508 return None
2509
2510 rootdev_base = os.path.basename(rootdev_str)
2511
2512 mmc_pattern = '/dev/mmcblk[0-9]'
2513 if re.match(mmc_pattern, rootdev_str):
2514 # Use type to determine if the internal device is eMMC or somthing
2515 # else. We can assume that MMC is always an internal device.
2516 type_cmd = 'cat /sys/block/%s/device/type' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002517 type = self.run(command=type_cmd, ignore_status=True)
2518 if type.exit_status:
2519 logging.info("Fail to run %s", type_cmd)
2520 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002521 type_str = type.stdout.strip()
2522
2523 if type_str == 'MMC':
2524 return 'storage:mmc'
2525
2526 scsi_pattern = '/dev/sd[a-z]+'
2527 if re.match(scsi_pattern, rootdev.stdout):
2528 # Read symlink for /sys/block/sd* to determine if the internal
2529 # device is connected via ata or usb.
2530 link_cmd = 'readlink /sys/block/%s' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002531 link = self.run(command=link_cmd, ignore_status=True)
2532 if link.exit_status:
2533 logging.info("Fail to run %s", link_cmd)
2534 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002535 link_str = link.stdout.strip()
2536 if 'usb' in link_str:
2537 return None
2538
2539 # Read rotation to determine if the internal device is ssd or hdd.
2540 rotate_cmd = str('cat /sys/block/%s/queue/rotational'
2541 % rootdev_base)
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002542 rotate = self.run(command=rotate_cmd, ignore_status=True)
2543 if rotate.exit_status:
2544 logging.info("Fail to run %s", rotate_cmd)
2545 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002546 rotate_str = rotate.stdout.strip()
2547
2548 rotate_dict = {'0':'storage:ssd', '1':'storage:hdd'}
2549 return rotate_dict.get(rotate_str)
2550
2551 # All other internal device / error case will always fall here
2552 return None
2553
2554
Dan Shi4e9a2aa2014-03-24 14:28:42 -07002555 @label_decorator('servo')
2556 def get_servo(self):
2557 """Determine if the host has a servo attached.
2558
2559 If the host has a working servo attached, it should have a servo label.
2560
2561 @return: string 'servo' if the host has servo attached. Otherwise,
2562 returns None.
2563 """
2564 return 'servo' if self._servo_host else None
2565
2566
Dan Shi5beba472014-05-28 22:46:07 -07002567 @label_decorator('video_labels')
2568 def get_video_labels(self):
2569 """Run /usr/local/bin/avtest_label_detect to get a list of video labels.
2570
2571 Sample output of avtest_label_detect:
2572 Detected label: hw_video_acc_vp8
2573 Detected label: webcam
2574
2575 @return: A list of labels detected by tool avtest_label_detect.
2576 """
2577 try:
Simran Basi40ca8182014-07-17 18:41:20 -07002578 # TODO (sbasi) crbug.com/391081 - Remove once the proper fix has
2579 # landed and supporting images older than the fix is no longer
2580 # necessary.
2581 # Change back to VT1 so avtest_label_detect does not get stuck.
2582 self.run('chvt 1')
Dan Shi5beba472014-05-28 22:46:07 -07002583 result = self.run('/usr/local/bin/avtest_label_detect').stdout
2584 return re.findall('^Detected label: (\w+)$', result, re.M)
2585 except error.AutoservRunError:
2586 # The tool is not installed.
2587 return []
2588
2589
mussa584b4462014-06-20 15:13:28 -07002590 @label_decorator('video_glitch_detection')
2591 def is_video_glitch_detection_supported(self):
2592 """ Determine if a board under test is supported for video glitch
2593 detection tests.
2594
2595 @return: 'video_glitch_detection' if board is supported, None otherwise.
2596 """
2597 parser = ConfigParser.SafeConfigParser()
2598 filename = os.path.join(
2599 common.autotest_dir, 'client/cros/video/device_spec.conf')
2600
2601 dut = self.get_board().replace(ds_constants.BOARD_PREFIX, '')
2602
2603 try:
2604 parser.read(filename)
mussa584b4462014-06-20 15:13:28 -07002605 supported_boards = parser.sections()
2606
Mussa83c84d62014-10-02 12:11:28 -07002607 return 'video_glitch_detection' if dut in supported_boards else None
mussa584b4462014-06-20 15:13:28 -07002608
2609 except ConfigParser.error:
2610 # something went wrong while parsing the conf file
2611 return None
2612
Katherine Threlkeld7b97a9f2014-06-24 13:47:14 -07002613 @label_decorator('touch_labels')
2614 def get_touch(self):
2615 """
2616 Determine whether board under test has a touchpad or touchscreen.
2617
2618 @return: A list of some combination of 'touchscreen' and 'touchpad',
2619 depending on what is present on the device.
2620 """
2621 labels = []
2622 input_cmd = '/opt/google/input/inputcontrol --names -t %s'
2623 for elt in ['touchpad', 'touchscreen']:
2624 if self.run(input_cmd % elt).stdout:
2625 labels.append(elt)
2626 return labels
2627
2628
mussa584b4462014-06-20 15:13:28 -07002629
Simran Basic6f1f7a2012-10-16 10:47:46 -07002630 def get_labels(self):
2631 """Return a list of labels for this given host.
2632
2633 This is the main way to retrieve all the automatic labels for a host
2634 as it will run through all the currently implemented label functions.
2635 """
2636 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08002637 for label_function in self._LABEL_FUNCTIONS:
J. Richard Barnetteb869b222014-09-03 17:55:44 -07002638 try:
2639 label = label_function(self)
2640 except Exception as e:
2641 logging.error('Label function %s failed; ignoring it.',
2642 label_function.__name__)
2643 logging.exception(e)
2644 label = None
Simran Basic6f1f7a2012-10-16 10:47:46 -07002645 if label:
Dan Shi5beba472014-05-28 22:46:07 -07002646 if type(label) is str:
2647 labels.append(label)
2648 elif type(label) is list:
2649 labels.extend(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -07002650 return labels
Dan Shi85276d42014-04-08 22:11:45 -07002651
2652
2653 def is_boot_from_usb(self):
2654 """Check if DUT is boot from USB.
2655
2656 @return: True if DUT is boot from usb.
2657 """
2658 device = self.run('rootdev -s -d').stdout.strip()
2659 removable = int(self.run('cat /sys/block/%s/removable' %
2660 os.path.basename(device)).stdout.strip())
2661 return removable == 1
Helen Zhang17dae2b2014-11-11 09:25:52 -08002662
2663
2664 def read_from_meminfo(self, key):
Dan Shi49ca0932014-11-14 11:22:27 -08002665 """Return the memory info from /proc/meminfo
Helen Zhang17dae2b2014-11-11 09:25:52 -08002666
2667 @param key: meminfo requested
2668
2669 @return the memory value as a string
2670
2671 """
Helen Zhang17dae2b2014-11-11 09:25:52 -08002672 meminfo = self.run('grep %s /proc/meminfo' % key).stdout.strip()
2673 logging.debug('%s', meminfo)
2674 return int(re.search(r'\d+', meminfo).group(0))