blob: dbc3a2ebac50aef377497a2e137ac8bb429fb617 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
mussa584b4462014-06-20 15:13:28 -07005import ConfigParser
Aviv Keshet74c89a92013-02-04 15:18:30 -08006import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07007import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import logging
Dan Shi0f466e82013-02-22 15:44:58 -08009import os
Simran Basid5e5e272012-09-24 15:23:59 -070010import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080011import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070014import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070015
mussa584b4462014-06-20 15:13:28 -070016import common
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.bin import utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070018from autotest_lib.client.common_lib import autotemp
Richard Barnette0c73ffc2012-11-19 15:21:18 -080019from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080022from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080023from autotest_lib.client.common_lib.cros import retry
Gabe Blackb72f4fb2015-01-20 16:47:13 -080024from autotest_lib.client.common_lib.cros.graphite import autotest_es
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
MK Ryu35d661e2014-09-25 17:44:10 -070026from autotest_lib.client.cros import constants as client_constants
J. Richard Barnette84890bd2014-02-21 11:05:47 -080027from autotest_lib.client.cros import cros_ui
Cheng-Yi Chiangf4104ff2014-12-23 19:39:01 +080028from autotest_lib.client.cros.audio import cras_utils
MK Ryu35d661e2014-09-25 17:44:10 -070029from autotest_lib.server import autoserv_parser
30from autotest_lib.server import autotest
31from autotest_lib.server import constants
32from autotest_lib.server import crashcollect
Dan Shia1ecd5c2013-06-06 11:21:31 -070033from autotest_lib.server import utils as server_utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070034from autotest_lib.server.cros import provision
Scott Zawalski89c44dd2013-02-26 09:28:02 -050035from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070036from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Dan Shi9cb0eec2014-06-03 09:04:50 -070037from autotest_lib.server.cros.faft.config.config import Config as FAFTConfig
Fang Deng96667ca2013-08-01 17:46:18 -070038from autotest_lib.server.hosts import abstract_ssh
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +080039from autotest_lib.server.hosts import chameleon_host
Fang Deng5d518f42013-08-02 14:04:32 -070040from autotest_lib.server.hosts import servo_host
Simran Basidcff4252012-11-20 16:13:20 -080041from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070042
43
beeps32a63082013-08-22 14:02:29 -070044try:
45 import jsonrpclib
46except ImportError:
47 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070048
Fang Dengd1c2b732013-08-20 12:59:46 -070049
beepsc87ff602013-07-31 21:53:00 -070050class FactoryImageCheckerException(error.AutoservError):
51 """Exception raised when an image is a factory image."""
52 pass
53
54
Aviv Keshet74c89a92013-02-04 15:18:30 -080055def add_label_detector(label_function_list, label_list=None, label=None):
56 """Decorator used to group functions together into the provided list.
57 @param label_function_list: List of label detecting functions to add
58 decorated function to.
59 @param label_list: List of detectable labels to add detectable labels to.
60 (Default: None)
61 @param label: Label string that is detectable by this detection function
62 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080063 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070064 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080065 """
66 @param func: The function to be added as a detector.
67 """
68 label_function_list.append(func)
69 if label and label_list is not None:
70 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070071 return func
72 return add_func
73
74
Fang Deng0ca40e22013-08-27 17:47:44 -070075class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070076 """Chromium OS specific subclass of Host."""
77
78 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050079 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070080
Richard Barnette03a0c132012-11-05 12:40:35 -080081 # Timeout values (in seconds) associated with various Chrome OS
82 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070083 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080084 # In general, a good rule of thumb is that the timeout can be up
85 # to twice the typical measured value on the slowest platform.
86 # The times here have not necessarily been empirically tested to
87 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070088 #
89 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080090 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
91 # time to restart the netwowrk.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080092 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070093 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080094 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080095 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070096 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080097 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080098 # network.
beepsf079cfb2013-09-18 17:49:51 -070099 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800100 # POWERWASH_BOOT_TIMEOUT: Time to allow for a reboot that
101 # includes powerwash.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700102
103 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800104 RESUME_TIMEOUT = 10
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +0800105 SHUTDOWN_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700106 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700107 USB_BOOT_TIMEOUT = 150
J. Richard Barnette7817b052014-08-28 09:47:29 -0700108 INSTALL_TIMEOUT = 480
Dan Shi2c88eed2013-11-12 10:18:38 -0800109 POWERWASH_BOOT_TIMEOUT = 60
Chris Sosab76e0ee2013-05-22 16:55:41 -0700110
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800111 # REBOOT_TIMEOUT: How long to wait for a reboot.
112 #
Chris Sosab76e0ee2013-05-22 16:55:41 -0700113 # We have a long timeout to ensure we don't flakily fail due to other
114 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
Simran Basi1160e2c2013-10-04 16:00:24 -0700115 # TODO(sbasi - crbug.com/276094) Restore to 5 mins once the 'host did not
116 # return from reboot' bug is solved.
117 REBOOT_TIMEOUT = 480
Chris Sosab76e0ee2013-05-22 16:55:41 -0700118
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800119 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
120 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
121 _USB_POWER_TIMEOUT = 5
122 _POWER_CYCLE_TIMEOUT = 10
123
beeps32a63082013-08-22 14:02:29 -0700124 _RPC_PROXY_URL = 'http://localhost:%d'
Christopher Wileydd181852013-10-10 19:56:58 -0700125 _RPC_SHUTDOWN_POLLING_PERIOD_SECONDS = 2
Peter Qiu4410db72014-06-05 10:32:41 -0700126 # Set shutdown timeout to account for the time for restarting the UI.
127 _RPC_SHUTDOWN_TIMEOUT_SECONDS = cros_ui.RESTART_UI_TIMEOUT
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800128
Richard Barnette82c35912012-11-20 10:09:10 -0800129 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
130 'rpm_recovery_boards', type=str).split(',')
131
132 _MAX_POWER_CYCLE_ATTEMPTS = 6
133 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
Fang Dengdeba14f2014-11-14 11:54:09 -0800134 _RPM_HOSTNAME_REGEX = ('chromeos(\d+)(-row(\d+))?-rack(\d+[a-z]*)'
135 '-host(\d+)')
Gwendal Grignoua66f1d12014-12-03 10:07:26 -0800136 _LIGHT_SENSOR_FILES = [ "in_illuminance0_input",
137 "in_illuminance_input",
138 "in_illuminance0_raw",
139 "in_illuminance_raw",
140 "illuminance0_input"]
Richard Barnette82c35912012-11-20 10:09:10 -0800141 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
142 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800143 _DETECTABLE_LABELS = []
144 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
145 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700146
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800147 # Constants used in ping_wait_up() and ping_wait_down().
148 #
149 # _PING_WAIT_COUNT is the approximate number of polling
150 # cycles to use when waiting for a host state change.
151 #
152 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
153 # for arguments to the internal _ping_wait_for_status()
154 # method.
155 _PING_WAIT_COUNT = 40
156 _PING_STATUS_DOWN = False
157 _PING_STATUS_UP = True
158
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800159 # Allowed values for the power_method argument.
160
161 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
162 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
163 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
164 POWER_CONTROL_RPM = 'RPM'
165 POWER_CONTROL_SERVO = 'servoj10'
166 POWER_CONTROL_MANUAL = 'manual'
167
168 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
169 POWER_CONTROL_SERVO,
170 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800171
Simran Basi5e6339a2013-03-21 11:34:32 -0700172 _RPM_OUTLET_CHANGED = 'outlet_changed'
173
Dan Shi9cb0eec2014-06-03 09:04:50 -0700174 # URL pattern to download firmware image.
175 _FW_IMAGE_URL_PATTERN = global_config.global_config.get_config_value(
176 'CROS', 'firmware_url_pattern', type=str)
beeps687243d2013-07-18 15:29:27 -0700177
MK Ryu35d661e2014-09-25 17:44:10 -0700178 # File that has a list of directories to be collected
179 _LOGS_TO_COLLECT_FILE = os.path.join(
180 common.client_dir, 'common_lib', 'logs_to_collect')
181
182 # Prefix of logging message w.r.t. crash collection
183 _CRASHLOGS_PREFIX = 'collect_crashlogs'
184
185 # Time duration waiting for host up/down check
186 _CHECK_HOST_UP_TIMEOUT_SECS = 15
187
188 # A command that interacts with kernel and hardware (e.g., rm, mkdir, etc)
189 # might not be completely done deep through the hardware when the machine
190 # is powered down right after the command returns.
191 # We should wait for a few seconds to make them done. Finger crossed.
192 _SAFE_WAIT_SECS = 10
193
194
J. Richard Barnette964fba02012-10-24 17:34:29 -0700195 @staticmethod
beeps46dadc92013-11-07 14:07:10 -0800196 def check_host(host, timeout=10):
197 """
198 Check if the given host is a chrome-os host.
199
200 @param host: An ssh host representing a device.
201 @param timeout: The timeout for the run command.
202
203 @return: True if the host device is chromeos.
204
beeps46dadc92013-11-07 14:07:10 -0800205 """
206 try:
Christopher Wiley1ea80942014-02-26 16:45:08 -0800207 result = host.run('grep -q CHROMEOS /etc/lsb-release && '
Simran Basie5f7ae42014-06-26 15:44:06 -0700208 '! which adb >/dev/null 2>&1 && '
209 '! grep -q moblab /etc/lsb-release',
Christopher Wileyfc3eac02013-11-21 16:24:57 -0800210 ignore_status=True, timeout=timeout)
beeps46dadc92013-11-07 14:07:10 -0800211 except (error.AutoservRunError, error.AutoservSSHTimeout):
212 return False
213 return result.exit_status == 0
214
215
216 @staticmethod
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800217 def _extract_arguments(args_dict, key_subset):
218 """Extract options from `args_dict` and return a subset result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800219
220 Take the provided dictionary of argument options and return
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800221 a subset that represent standard arguments needed to construct
222 a test-assistant object (chameleon or servo) for a host. The
223 intent is to provide standard argument processing from
224 run_remote_tests for tests that require a test-assistant board
225 to operate.
226
227 @param args_dict Dictionary from which to extract the arguments.
228 @param key_subset Tuple of keys to extract from the args_dict, e.g.
229 ('servo_host', 'servo_port').
230 """
231 result = {}
232 for arg in key_subset:
233 if arg in args_dict:
234 result[arg] = args_dict[arg]
235 return result
236
237
238 @staticmethod
239 def get_chameleon_arguments(args_dict):
240 """Extract chameleon options from `args_dict` and return the result.
241
242 Recommended usage:
243 ~~~~~~~~
244 args_dict = utils.args_to_dict(args)
245 chameleon_args = hosts.CrosHost.get_chameleon_arguments(args_dict)
246 host = hosts.create_host(machine, chameleon_args=chameleon_args)
247 ~~~~~~~~
248
249 @param args_dict Dictionary from which to extract the chameleon
250 arguments.
251 """
252 return CrosHost._extract_arguments(
253 args_dict, ('chameleon_host', 'chameleon_port'))
254
255
256 @staticmethod
257 def get_servo_arguments(args_dict):
258 """Extract servo options from `args_dict` and return the result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800259
260 Recommended usage:
261 ~~~~~~~~
262 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700263 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800264 host = hosts.create_host(machine, servo_args=servo_args)
265 ~~~~~~~~
266
267 @param args_dict Dictionary from which to extract the servo
268 arguments.
269 """
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800270 return CrosHost._extract_arguments(
271 args_dict, ('servo_host', 'servo_port'))
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700272
J. Richard Barnette964fba02012-10-24 17:34:29 -0700273
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800274 def _initialize(self, hostname, chameleon_args=None, servo_args=None,
Fang Denge545abb2014-12-30 18:43:47 -0800275 try_lab_servo=False, ssh_verbosity_flag='', ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700276 *args, **dargs):
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800277 """Initialize superclasses, |self.chameleon|, and |self.servo|.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700278
Fang Denge545abb2014-12-30 18:43:47 -0800279 This method will attempt to create the test-assistant object
280 (chameleon/servo) when it is needed by the test. Check
281 the docstring of chameleon_host.create_chameleon_host and
282 servo_host.create_servo_host for how this is determined.
Fang Deng5d518f42013-08-02 14:04:32 -0700283
Fang Denge545abb2014-12-30 18:43:47 -0800284 @param hostname: Hostname of the dut.
285 @param chameleon_args: A dictionary that contains args for creating
286 a ChameleonHost. See chameleon_host for details.
287 @param servo_args: A dictionary that contains args for creating
288 a ServoHost object. See servo_host for details.
289 @param try_lab_servo: Boolean, False indicates that ServoHost should
290 not be created for a device in Cros test lab.
291 See servo_host for details.
292 @param ssh_verbosity_flag: String, to pass to the ssh command to control
293 verbosity.
294 @param ssh_options: String, other ssh options to pass to the ssh
295 command.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700296 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700297 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700298 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700299 # self.env is a dictionary of environment variable settings
300 # to be exported for commands run on the host.
301 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
302 # errors that might happen.
303 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700304 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700305 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700306 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700307 # TODO(fdeng): We need to simplify the
308 # process of servo and servo_host initialization.
309 # crbug.com/298432
Fang Denge545abb2014-12-30 18:43:47 -0800310 self._servo_host = servo_host.create_servo_host(
311 dut=self.hostname, servo_args=servo_args,
312 try_lab_servo=try_lab_servo)
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800313 # TODO(waihong): Do the simplication on Chameleon too.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800314 self._chameleon_host = chameleon_host.create_chameleon_host(
315 dut=self.hostname, chameleon_args=chameleon_args)
316
Dan Shi4d478522014-02-14 13:46:32 -0800317 if self._servo_host is not None:
318 self.servo = self._servo_host.get_servo()
319 else:
320 self.servo = None
321
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800322 if self._chameleon_host:
Tom Wai-Hong Tameaee3402014-01-22 08:52:10 +0800323 self.chameleon = self._chameleon_host.create_chameleon_board()
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800324 else:
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800325 self.chameleon = None
Fang Deng5d518f42013-08-02 14:04:32 -0700326
327
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500328 def get_repair_image_name(self):
329 """Generate a image_name from variables in the global config.
330
331 @returns a str of $board-version/$BUILD.
332
333 """
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500334 board = self._get_board_from_afe()
335 if board is None:
336 raise error.AutoservError('DUT has no board attribute, '
337 'cannot be repaired.')
Dan Shi6964fa52014-12-18 11:04:27 -0800338 stable_version = self._AFE.run('get_stable_version', board=board)
339 build_pattern = global_config.global_config.get_config_value(
340 'CROS', 'stable_build_pattern')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500341 return build_pattern % (board, stable_version)
342
343
Scott Zawalski62bacae2013-03-05 10:40:32 -0500344 def _host_in_AFE(self):
345 """Check if the host is an object the AFE knows.
346
347 @returns the host object.
348 """
349 return self._AFE.get_hosts(hostname=self.hostname)
350
351
Chris Sosab76e0ee2013-05-22 16:55:41 -0700352 def lookup_job_repo_url(self):
353 """Looks up the job_repo_url for the host.
354
355 @returns job_repo_url from AFE or None if not found.
356
357 @raises KeyError if the host does not have a job_repo_url
358 """
359 if not self._host_in_AFE():
360 return None
361
362 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700363 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
364 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700365
366
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500367 def clear_cros_version_labels_and_job_repo_url(self):
368 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500369 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400370 return
371
Scott Zawalski62bacae2013-03-05 10:40:32 -0500372 host_list = [self.hostname]
373 labels = self._AFE.get_labels(
374 name__startswith=ds_constants.VERSION_PREFIX,
375 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800376
Scott Zawalski62bacae2013-03-05 10:40:32 -0500377 for label in labels:
378 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500379
beepscb6f1e22013-06-28 19:14:10 -0700380 self.update_job_repo_url(None, None)
381
382
383 def update_job_repo_url(self, devserver_url, image_name):
384 """
385 Updates the job_repo_url host attribute and asserts it's value.
386
387 @param devserver_url: The devserver to use in the job_repo_url.
388 @param image_name: The name of the image to use in the job_repo_url.
389
390 @raises AutoservError: If we failed to update the job_repo_url.
391 """
392 repo_url = None
393 if devserver_url and image_name:
394 repo_url = tools.get_package_url(devserver_url, image_name)
395 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500396 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700397 if self.lookup_job_repo_url() != repo_url:
398 raise error.AutoservError('Failed to update job_repo_url with %s, '
399 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500400
401
Dan Shie9309262013-06-19 22:50:21 -0700402 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400403 """Add cros_version labels and host attribute job_repo_url.
404
405 @param image_name: The name of the image e.g.
406 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700407
Scott Zawalskieadbf702013-03-14 09:23:06 -0400408 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500409 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400410 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500411
Scott Zawalskieadbf702013-03-14 09:23:06 -0400412 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700413 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500414
415 labels = self._AFE.get_labels(name=cros_label)
416 if labels:
417 label = labels[0]
418 else:
419 label = self._AFE.create_label(name=cros_label)
420
421 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700422 self.update_job_repo_url(devserver_url, image_name)
423
424
beepsdae65fd2013-07-26 16:24:41 -0700425 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700426 """
427 Make sure job_repo_url of this host is valid.
428
joychen03eaad92013-06-26 09:55:21 -0700429 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700430 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
431 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
432 download and extract it. If the devserver embedded in the url is
433 unresponsive, update the job_repo_url of the host after staging it on
434 another devserver.
435
436 @param job_repo_url: A url pointing to the devserver where the autotest
437 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700438 @param tag: The tag from the server job, in the format
439 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700440
441 @raises DevServerException: If we could not resolve a devserver.
442 @raises AutoservError: If we're unable to save the new job_repo_url as
443 a result of choosing a new devserver because the old one failed to
444 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700445 @raises urllib2.URLError: If the devserver embedded in job_repo_url
446 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700447 """
448 job_repo_url = self.lookup_job_repo_url()
449 if not job_repo_url:
450 logging.warning('No job repo url set on host %s', self.hostname)
451 return
452
453 logging.info('Verifying job repo url %s', job_repo_url)
454 devserver_url, image_name = tools.get_devserver_build_from_package_url(
455 job_repo_url)
456
beeps0c865032013-07-30 11:37:06 -0700457 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700458
459 logging.info('Staging autotest artifacts for %s on devserver %s',
460 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700461
462 start_time = time.time()
Simran Basi25e7a922014-10-31 11:56:10 -0700463 ds.stage_artifacts(image_name, ['autotest_packages'])
beeps687243d2013-07-18 15:29:27 -0700464 stage_time = time.time() - start_time
465
466 # Record how much of the verification time comes from a devserver
467 # restage. If we're doing things right we should not see multiple
468 # devservers for a given board/build/branch path.
469 try:
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800470 board, build_type, branch = server_utils.ParseBuildName(
beeps687243d2013-07-18 15:29:27 -0700471 image_name)[:3]
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800472 except server_utils.ParseBuildNameException:
beeps687243d2013-07-18 15:29:27 -0700473 pass
474 else:
beeps0c865032013-07-30 11:37:06 -0700475 devserver = devserver_url[
Chris Sosa65425082013-10-16 13:26:22 -0700476 devserver_url.find('/') + 2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700477 stats_key = {
478 'board': board,
479 'build_type': build_type,
480 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700481 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700482 }
Gabe Black1e1c41b2015-02-04 23:55:15 -0800483 autotest_stats.Gauge('verify_job_repo_url').send(
beeps687243d2013-07-18 15:29:27 -0700484 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
485 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700486
Scott Zawalskieadbf702013-03-14 09:23:06 -0400487
Dan Shi0f466e82013-02-22 15:44:58 -0800488 def _try_stateful_update(self, update_url, force_update, updater):
489 """Try to use stateful update to initialize DUT.
490
491 When DUT is already running the same version that machine_install
492 tries to install, stateful update is a much faster way to clean up
493 the DUT for testing, compared to a full reimage. It is implemeted
494 by calling autoupdater.run_update, but skipping updating root, as
495 updating the kernel is time consuming and not necessary.
496
497 @param update_url: url of the image.
498 @param force_update: Set to True to update the image even if the DUT
499 is running the same version.
500 @param updater: ChromiumOSUpdater instance used to update the DUT.
501 @returns: True if the DUT was updated with stateful update.
502
503 """
J. Richard Barnette3f731032014-04-07 17:42:59 -0700504 # TODO(jrbarnette): Yes, I hate this re.match() test case.
505 # It's better than the alternative: see crbug.com/360944.
506 image_name = autoupdater.url_to_image_name(update_url)
507 release_pattern = r'^.*-release/R[0-9]+-[0-9]+\.[0-9]+\.0$'
508 if not re.match(release_pattern, image_name):
509 return False
Dan Shi0f466e82013-02-22 15:44:58 -0800510 if not updater.check_version():
511 return False
512 if not force_update:
513 logging.info('Canceling stateful update because the new and '
514 'old versions are the same.')
515 return False
516 # Following folders should be rebuilt after stateful update.
517 # A test file is used to confirm each folder gets rebuilt after
518 # the stateful update.
519 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
520 test_file = '.test_file_to_be_deleted'
521 for folder in folders_to_check:
522 touch_path = os.path.join(folder, test_file)
523 self.run('touch %s' % touch_path)
524
525 if not updater.run_update(force_update=True, update_root=False):
526 return False
527
528 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700529 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800530 check_file_cmd = 'test -f %s; echo $?'
531 for folder in folders_to_check:
532 test_file_path = os.path.join(folder, test_file)
533 result = self.run(check_file_cmd % test_file_path,
534 ignore_status=True)
535 if result.exit_status == 1:
536 return False
537 return True
538
539
J. Richard Barnette7275b612013-06-04 18:13:11 -0700540 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800541 """After the DUT is updated, confirm machine_install succeeded.
542
543 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700544 @param expected_kernel: kernel expected to be active after reboot,
545 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800546
547 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700548 # Touch the lab machine file to leave a marker that
549 # distinguishes this image from other test images.
550 # Afterwards, we must re-run the autoreboot script because
551 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800552 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800553 self.run('start autoreboot')
Chris Sosa65425082013-10-16 13:26:22 -0700554 updater.verify_boot_expectations(
555 expected_kernel, rollback_message=
556 'Build %s failed to boot on %s; system rolled back to previous'
557 'build' % (updater.update_version, self.hostname))
J. Richard Barnette7275b612013-06-04 18:13:11 -0700558 # Check that we've got the build we meant to install.
559 if not updater.check_version_to_confirm_install():
560 raise autoupdater.ChromiumOSError(
561 'Failed to update %s to build %s; found build '
562 '%s instead' % (self.hostname,
Chris Sosa65425082013-10-16 13:26:22 -0700563 updater.update_version,
564 updater.get_build_id()))
Dan Shi0f466e82013-02-22 15:44:58 -0800565
566
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700567 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400568 """Stage a build on a devserver and return the update_url.
569
570 @param image_name: a name like lumpy-release/R27-3837.0.0
571 @returns an update URL like:
572 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
573 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700574 if not image_name:
575 image_name = self.get_repair_image_name()
576 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400577 devserver = dev_server.ImageServer.resolve(image_name)
578 devserver.trigger_download(image_name, synchronous=False)
579 return tools.image_url_pattern() % (devserver.url(), image_name)
580
581
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700582 def stage_image_for_servo(self, image_name=None):
583 """Stage a build on a devserver and return the update_url.
584
585 @param image_name: a name like lumpy-release/R27-3837.0.0
586 @returns an update URL like:
587 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
588 """
589 if not image_name:
590 image_name = self.get_repair_image_name()
591 logging.info('Staging build for servo install: %s', image_name)
592 devserver = dev_server.ImageServer.resolve(image_name)
593 devserver.stage_artifacts(image_name, ['test_image'])
594 return devserver.get_test_image_url(image_name)
595
596
beepse539be02013-07-31 21:57:39 -0700597 def stage_factory_image_for_servo(self, image_name):
598 """Stage a build on a devserver and return the update_url.
599
600 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700601
beepse539be02013-07-31 21:57:39 -0700602 @return: An update URL, eg:
603 http://<devserver>/static/canary-channel/\
604 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700605
606 @raises: ValueError if the factory artifact name is missing from
607 the config.
608
beepse539be02013-07-31 21:57:39 -0700609 """
610 if not image_name:
611 logging.error('Need an image_name to stage a factory image.')
612 return
613
beeps12c0a3c2013-09-03 11:58:27 -0700614 factory_artifact = global_config.global_config.get_config_value(
615 'CROS', 'factory_artifact', type=str, default='')
616 if not factory_artifact:
617 raise ValueError('Cannot retrieve the factory artifact name from '
618 'autotest config, and hence cannot stage factory '
619 'artifacts.')
620
beepse539be02013-07-31 21:57:39 -0700621 logging.info('Staging build for servo install: %s', image_name)
622 devserver = dev_server.ImageServer.resolve(image_name)
623 devserver.stage_artifacts(
624 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700625 [factory_artifact],
626 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700627
628 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
629
630
Chris Sosaa3ac2152012-05-23 22:23:13 -0700631 def machine_install(self, update_url=None, force_update=False,
Fang Deng3d3b9272014-12-22 12:20:28 -0800632 local_devserver=False, repair=False,
633 force_full_update=False):
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500634 """Install the DUT.
635
Dan Shi0f466e82013-02-22 15:44:58 -0800636 Use stateful update if the DUT is already running the same build.
637 Stateful update does not update kernel and tends to run much faster
638 than a full reimage. If the DUT is running a different build, or it
639 failed to do a stateful update, full update, including kernel update,
640 will be applied to the DUT.
641
Scott Zawalskieadbf702013-03-14 09:23:06 -0400642 Once a host enters machine_install its cros_version label will be
643 removed as well as its host attribute job_repo_url (used for
644 package install).
645
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500646 @param update_url: The url to use for the update
647 pattern: http://$devserver:###/update/$build
648 If update_url is None and repair is True we will install the
Dan Shi6964fa52014-12-18 11:04:27 -0800649 stable image listed in afe_stable_versions table. If the table
650 is not setup, global_config value under CROS.stable_cros_version
651 will be used instead.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500652 @param force_update: Force an update even if the version installed
653 is the same. Default:False
654 @param local_devserver: Used by run_remote_test to allow people to
655 use their local devserver. Default: False
656 @param repair: Whether or not we are in repair mode. This adds special
657 cases for repairing a machine like starting update_engine.
658 Setting repair to True sets force_update to True as well.
659 default: False
Fang Deng3d3b9272014-12-22 12:20:28 -0800660 @param force_full_update: If True, do not attempt to run stateful
661 update, force a full reimage. If False, try stateful update
662 first when the dut is already installed with the same version.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500663 @raises autoupdater.ChromiumOSError
664
665 """
Dan Shi7458bf62013-06-10 12:50:16 -0700666 if update_url:
667 logging.debug('update url is set to %s', update_url)
668 else:
669 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700670 if self._parser.options.image:
671 requested_build = self._parser.options.image
672 if requested_build.startswith('http://'):
673 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700674 logging.debug('update url is retrieved from requested_build'
675 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700676 else:
677 # Try to stage any build that does not start with
678 # http:// on the devservers defined in
679 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700680 update_url = self._stage_image_for_update(requested_build)
681 logging.debug('Build staged, and update_url is set to: %s',
682 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700683 elif repair:
684 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700685 logging.debug('Build staged, and update_url is set to: %s',
686 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400687 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700688 raise autoupdater.ChromiumOSError(
689 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500690
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500691 if repair:
J. Richard Barnette158f1792015-02-24 17:43:53 -0800692 # In case the system is in a bad state, we always reboot
693 # the machine before trying to repair.
694 #
695 # If Chrome is crashing, the ui-respawn job may reboot
696 # the DUT to try and "fix" it. Guard against that
697 # behavior by stopping the 'ui' job.
698 #
699 # If Chrome failed to start, update-engine won't be running,
700 # so restart it by force.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700701 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette158f1792015-02-24 17:43:53 -0800702 self.run('stop ui || true')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500703 self.run('stop update-engine; start update-engine')
704 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800705
Chris Sosaa3ac2152012-05-23 22:23:13 -0700706 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700707 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800708 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400709 # Remove cros-version and job_repo_url host attribute from host.
710 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800711 # If the DUT is already running the same build, try stateful update
712 # first. Stateful update does not update kernel and tends to run much
713 # faster than a full reimage.
Fang Deng3d3b9272014-12-22 12:20:28 -0800714 if not force_full_update:
715 try:
716 updated = self._try_stateful_update(
717 update_url, force_update, updater)
718 if updated:
719 logging.info('DUT is updated with stateful update.')
720 except Exception as e:
721 logging.exception(e)
722 logging.warning('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700723
Dan Shi0f466e82013-02-22 15:44:58 -0800724 inactive_kernel = None
725 # Do a full update if stateful update is not applicable or failed.
726 if not updated:
Chris Sosab7612bc2013-03-21 10:32:37 -0700727 # TODO(sosa): Remove temporary hack to get rid of bricked machines
728 # that can't update due to a corrupted policy.
729 self.run('rm -rf /var/lib/whitelist')
Danny Chanad490bd2014-11-04 14:15:57 -0800730 self.run('mkdir /var/lib/whitelist')
Chris Sosab7612bc2013-03-21 10:32:37 -0700731 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400732 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700733
Dan Shi0f466e82013-02-22 15:44:58 -0800734 if updater.run_update(force_update):
735 updated = True
736 # Figure out active and inactive kernel.
737 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700738
Dan Shi0f466e82013-02-22 15:44:58 -0800739 # Ensure inactive kernel has higher priority than active.
740 if (updater.get_kernel_priority(inactive_kernel)
741 < updater.get_kernel_priority(active_kernel)):
742 raise autoupdater.ChromiumOSError(
743 'Update failed. The priority of the inactive kernel'
744 ' partition is less than that of the active kernel'
745 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700746
Dan Shi0f466e82013-02-22 15:44:58 -0800747 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700748 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi5699ac22014-12-19 10:55:49 -0800749
750 if updated:
Dan Shi0f466e82013-02-22 15:44:58 -0800751 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400752 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700753 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800754
Simran Basiae08c8c2014-09-02 11:17:26 -0700755 logging.debug('Cleaning up old autotest directories.')
756 try:
757 installed_autodir = autotest.Autotest.get_installed_autodir(self)
758 self.run('rm -rf ' + installed_autodir)
759 except autotest.AutodirNotFoundError:
760 logging.debug('No autotest installed directory found.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700761
762
Dan Shi9cb0eec2014-06-03 09:04:50 -0700763 def _clear_fw_version_labels(self):
764 """Clear firmware version labels from the machine."""
765 labels = self._AFE.get_labels(
766 name__startswith=provision.FW_VERSION_PREFIX,
767 host__hostname=self.hostname)
768 for label in labels:
769 label.remove_hosts(hosts=[self.hostname])
770
771
772 def _add_fw_version_label(self, build):
773 """Add firmware version label to the machine.
774
775 @param build: Build of firmware.
776
777 """
778 fw_label = provision.fw_version_to_label(build)
779 provision.ensure_label_exists(fw_label)
780 label = self._AFE.get_labels(name__startswith=fw_label)[0]
781 label.add_hosts([self.hostname])
782
783
784 def firmware_install(self, build=None):
785 """Install firmware to the DUT.
786
787 Use stateful update if the DUT is already running the same build.
788 Stateful update does not update kernel and tends to run much faster
789 than a full reimage. If the DUT is running a different build, or it
790 failed to do a stateful update, full update, including kernel update,
791 will be applied to the DUT.
792
793 Once a host enters firmware_install its fw_version label will be
794 removed. After the firmware is updated successfully, a new fw_version
795 label will be added to the host.
796
797 @param build: The build version to which we want to provision the
798 firmware of the machine,
799 e.g. 'link-firmware/R22-2695.1.144'.
800
801 TODO(dshi): After bug 381718 is fixed, update here with corresponding
802 exceptions that could be raised.
803
804 """
805 if not self.servo:
806 raise error.TestError('Host %s does not have servo.' %
807 self.hostname)
808
809 # TODO(fdeng): use host.get_board() after
810 # crbug.com/271834 is fixed.
811 board = self._get_board_from_afe()
812
813 # If build is not set, assume it's repair mode and try to install
814 # firmware from stable CrOS.
815 if not build:
816 build = self.get_repair_image_name()
817
818 config = FAFTConfig(board)
819 if config.use_u_boot:
820 ap_image = 'image-%s.bin' % board
821 else: # Depthcharge platform
822 ap_image = 'image.bin'
823 ec_image = 'ec.bin'
824 ds = dev_server.ImageServer.resolve(build)
825 ds.stage_artifacts(build, ['firmware'])
826
827 tmpd = autotemp.tempdir(unique_id='fwimage')
828 try:
829 fwurl = self._FW_IMAGE_URL_PATTERN % (ds.url(), build)
830 local_tarball = os.path.join(tmpd.name, os.path.basename(fwurl))
831 server_utils.system('wget -O %s %s' % (local_tarball, fwurl),
832 timeout=60)
833 server_utils.system('tar xf %s -C %s %s %s' %
834 (local_tarball, tmpd.name, ap_image, ec_image),
835 timeout=60)
836 server_utils.system('tar xf %s --wildcards -C %s "dts/*"' %
837 (local_tarball, tmpd.name),
838 timeout=60, ignore_status=True)
839
840 self._clear_fw_version_labels()
841 logging.info('Will re-program EC now')
842 self.servo.program_ec(os.path.join(tmpd.name, ec_image))
843 logging.info('Will re-program BIOS now')
844 self.servo.program_bios(os.path.join(tmpd.name, ap_image))
845 self.servo.get_power_state_controller().reset()
846 time.sleep(self.servo.BOOT_DELAY)
847 self._add_fw_version_label()
848 finally:
849 tmpd.clean()
850
851
Dan Shi10e992b2013-08-30 11:02:59 -0700852 def show_update_engine_log(self):
853 """Output update engine log."""
MK Ryu35d661e2014-09-25 17:44:10 -0700854 logging.debug('Dumping %s', client_constants.UPDATE_ENGINE_LOG)
855 self.run('cat %s' % client_constants.UPDATE_ENGINE_LOG)
Dan Shi10e992b2013-08-30 11:02:59 -0700856
857
Richard Barnette82c35912012-11-20 10:09:10 -0800858 def _get_board_from_afe(self):
859 """Retrieve this host's board from its labels in the AFE.
860
861 Looks for a host label of the form "board:<board>", and
862 returns the "<board>" part of the label. `None` is returned
863 if there is not a single, unique label matching the pattern.
864
865 @returns board from label, or `None`.
866 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700867 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800868
869
870 def get_build(self):
871 """Retrieve the current build for this Host from the AFE.
872
873 Looks through this host's labels in the AFE to determine its build.
874
875 @returns The current build or None if it could not find it or if there
876 were multiple build labels assigned to this host.
877 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700878 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800879
880
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500881 def _install_repair(self):
882 """Attempt to repair this host using upate-engine.
883
884 If the host is up, try installing the DUT with a stable
Dan Shi6964fa52014-12-18 11:04:27 -0800885 "repair" version of Chrome OS as defined in afe_stable_versions table.
886 If the table is not setup, global_config value under
887 CROS.stable_cros_version will be used instead.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500888
Scott Zawalski62bacae2013-03-05 10:40:32 -0500889 @raises AutoservRepairMethodNA if the DUT is not reachable.
890 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500891
892 """
893 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500894 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500895 logging.info('Attempting to reimage machine to repair image.')
896 try:
897 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700898 except autoupdater.ChromiumOSError as e:
899 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500900 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500901 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500902
903
Dan Shi2c88eed2013-11-12 10:18:38 -0800904 def _install_repair_with_powerwash(self):
Dan Shi9cc48452013-11-12 12:39:26 -0800905 """Attempt to powerwash first then repair this host using update-engine.
Dan Shi2c88eed2013-11-12 10:18:38 -0800906
Dan Shi9cc48452013-11-12 12:39:26 -0800907 update-engine may fail due to a bad image. In such case, powerwash
908 may help to cleanup the DUT for update-engine to work again.
Dan Shi2c88eed2013-11-12 10:18:38 -0800909
910 @raises AutoservRepairMethodNA if the DUT is not reachable.
911 @raises ChromiumOSError if the install failed for some reason.
912
913 """
914 if not self.is_up():
915 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
916
917 logging.info('Attempting to powerwash the DUT.')
918 self.run('echo "fast safe" > '
919 '/mnt/stateful_partition/factory_install_reset')
920 self.reboot(timeout=self.POWERWASH_BOOT_TIMEOUT, wait=True)
921 if not self.is_up():
Dan Shi9cc48452013-11-12 12:39:26 -0800922 logging.error('Powerwash failed. DUT did not come back after '
Dan Shi2c88eed2013-11-12 10:18:38 -0800923 'reboot.')
924 raise error.AutoservRepairFailure(
925 'DUT failed to boot from powerwash after %d seconds' %
926 self.POWERWASH_BOOT_TIMEOUT)
927
928 logging.info('Powerwash succeeded.')
929 self._install_repair()
930
931
beepsf079cfb2013-09-18 17:49:51 -0700932 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
933 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500934 """
935 Re-install the OS on the DUT by:
936 1) installing a test image on a USB storage device attached to the Servo
937 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800938 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700939 3) installing the image with chromeos-install.
940
Scott Zawalski62bacae2013-03-05 10:40:32 -0500941 @param image_url: If specified use as the url to install on the DUT.
942 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700943 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
944 Factory images need a longer usb_boot_timeout than regular
945 cros images.
946 @param install_timeout: The timeout to use when installing the chromeos
947 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800948
Scott Zawalski62bacae2013-03-05 10:40:32 -0500949 @raises AutoservError if the image fails to boot.
beepsf079cfb2013-09-18 17:49:51 -0700950
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800951 """
beepsf079cfb2013-09-18 17:49:51 -0700952 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
953 % usb_boot_timeout)
954 logging.info('Downloading image to USB, then booting from it. Usb boot '
955 'timeout = %s', usb_boot_timeout)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800956 timer = autotest_stats.Timer(usb_boot_timer_key)
beepsf079cfb2013-09-18 17:49:51 -0700957 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700958 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700959 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500960 raise error.AutoservRepairFailure(
961 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700962 usb_boot_timeout)
963 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500964
beepsf079cfb2013-09-18 17:49:51 -0700965 install_timer_key = ('servo_install.install_timeout_%s'
966 % install_timeout)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800967 timer = autotest_stats.Timer(install_timer_key)
beepsf079cfb2013-09-18 17:49:51 -0700968 timer.start()
969 logging.info('Installing image through chromeos-install.')
J. Richard Barnette2522a8f2015-03-04 15:59:15 -0800970 self.run('chromeos-install --yes',
MK Ryu35d661e2014-09-25 17:44:10 -0700971 timeout=install_timeout)
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800972 self.run('halt')
beepsf079cfb2013-09-18 17:49:51 -0700973 timer.stop()
974
975 logging.info('Power cycling DUT through servo.')
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800976 self.servo.get_power_state_controller().power_off()
Fang Dengafb88142013-05-30 17:44:31 -0700977 self.servo.switch_usbkey('off')
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800978 # N.B. The Servo API requires that we use power_on() here
979 # for two reasons:
980 # 1) After turning on a DUT in recovery mode, you must turn
981 # it off and then on with power_on() once more to
982 # disable recovery mode (this is a Parrot specific
983 # requirement).
984 # 2) After power_off(), the only way to turn on is with
985 # power_on() (this is a Storm specific requirement).
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700986 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -0700987
988 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800989 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
990 raise error.AutoservError('DUT failed to reboot installed '
991 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500992 self.BOOT_TIMEOUT)
993
994
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700995 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500996 """Reinstall the DUT utilizing servo and a test image.
997
998 Re-install the OS on the DUT by:
999 1) installing a test image on a USB storage device attached to the Servo
1000 board,
1001 2) booting that image in recovery mode, and then
1002 3) installing the image with chromeos-install.
1003
Scott Zawalski62bacae2013-03-05 10:40:32 -05001004 @raises AutoservRepairMethodNA if the device does not have servo
1005 support.
1006
1007 """
1008 if not self.servo:
1009 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
1010 'DUT has no servo support.')
1011
1012 logging.info('Attempting to recovery servo enabled device with '
1013 'servo_repair_reinstall')
1014
J. Richard Barnettee4af8b92013-05-01 13:16:12 -07001015 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001016 self.servo_install(image_url)
1017
1018
1019 def _servo_repair_power(self):
1020 """Attempt to repair DUT using an attached Servo.
1021
1022 Attempt to power on the DUT via power_long_press.
1023
1024 @raises AutoservRepairMethodNA if the device does not have servo
1025 support.
1026 @raises AutoservRepairFailure if the repair fails for any reason.
1027 """
1028 if not self.servo:
1029 raise error.AutoservRepairMethodNA('Repair Power NA: '
1030 'DUT has no servo support.')
1031
1032 logging.info('Attempting to recover servo enabled device by '
1033 'powering it off and on.')
1034 self.servo.get_power_state_controller().power_off()
1035 self.servo.get_power_state_controller().power_on()
1036 if self.wait_up(self.BOOT_TIMEOUT):
1037 return
1038
1039 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -08001040
1041
Richard Barnette82c35912012-11-20 10:09:10 -08001042 def _powercycle_to_repair(self):
1043 """Utilize the RPM Infrastructure to bring the host back up.
1044
1045 If the host is not up/repaired after the first powercycle we utilize
1046 auto fallback to the last good install by powercycling and rebooting the
1047 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001048
1049 @raises AutoservRepairMethodNA if the device does not support remote
1050 power.
1051 @raises AutoservRepairFailure if the repair fails for any reason.
1052
Richard Barnette82c35912012-11-20 10:09:10 -08001053 """
Scott Zawalski62bacae2013-03-05 10:40:32 -05001054 if not self.has_power():
1055 raise error.AutoservRepairMethodNA('Device does not support power.')
1056
Richard Barnette82c35912012-11-20 10:09:10 -08001057 logging.info('Attempting repair via RPM powercycle.')
1058 failed_cycles = 0
1059 self.power_cycle()
1060 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
1061 failed_cycles += 1
1062 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -05001063 raise error.AutoservRepairFailure(
1064 'Powercycled host %s %d times; device did not come back'
1065 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -08001066 self.power_cycle()
1067 if failed_cycles == 0:
1068 logging.info('Powercycling was successful first time.')
1069 else:
1070 logging.info('Powercycling was successful after %d failures.',
1071 failed_cycles)
1072
1073
MK Ryu35d661e2014-09-25 17:44:10 -07001074 def _reboot_repair(self):
1075 """SSH to this host and reboot."""
1076 if not self.is_up(self._CHECK_HOST_UP_TIMEOUT_SECS):
1077 raise error.AutoservRepairMethodNA('DUT unreachable for reboot.')
1078 logging.info('Attempting repair via SSH reboot.')
1079 self.reboot(timeout=self.BOOT_TIMEOUT, wait=True)
1080
1081
Prashanth B4d8184f2014-05-05 12:22:02 -07001082 def check_device(self):
1083 """Check if a device is ssh-able, and if so, clean and verify it.
1084
1085 @raise AutoservSSHTimeout: If the ssh ping times out.
1086 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
1087 permissions.
1088 @raise AutoservSshPingHostError: For other AutoservRunErrors during
1089 ssh_ping.
1090 @raises AutoservError: As appropriate, during cleanup and verify.
1091 """
1092 self.ssh_ping()
1093 self.cleanup()
1094 self.verify()
1095
1096
Richard Barnette82c35912012-11-20 10:09:10 -08001097 def repair_full(self):
1098 """Repair a host for repair level NO_PROTECTION.
1099
1100 This overrides the base class function for repair; it does
1101 not call back to the parent class, but instead offers a
1102 simplified implementation based on the capabilities in the
1103 Chrome OS test lab.
1104
Fang Deng5d518f42013-08-02 14:04:32 -07001105 It first verifies and repairs servo if it is a DUT in CrOS
Fang Deng03590af2013-10-07 17:34:20 -07001106 lab and a servo is attached.
Fang Deng5d518f42013-08-02 14:04:32 -07001107
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001108 This escalates in order through the following procedures and verifies
1109 the status using `self.check_device()` after each of them. This is done
1110 until both the repair and the veryfing step succeed.
1111
MK Ryu35d661e2014-09-25 17:44:10 -07001112 Escalation order of repair procedures from less intrusive to
1113 more intrusive repairs:
1114 1. SSH to the DUT and reboot.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001115 2. If there's a servo for the DUT, try to power the DUT off and
1116 on.
MK Ryu35d661e2014-09-25 17:44:10 -07001117 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -08001118 by power-cycling.
MK Ryu35d661e2014-09-25 17:44:10 -07001119 4. Try to re-install to a known stable image using
1120 auto-update.
1121 5. If there's a servo for the DUT, try to re-install via
1122 the servo.
Richard Barnette82c35912012-11-20 10:09:10 -08001123
1124 As with the parent method, the last operation performed on
Prashanth B4d8184f2014-05-05 12:22:02 -07001125 the DUT must be to call `self.check_device()`; If that call fails the
1126 exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -07001127
Scott Zawalski62bacae2013-03-05 10:40:32 -05001128 @raises AutoservRepairTotalFailure if the repair process fails to
1129 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -07001130 @raises ServoHostRepairTotalFailure if the repair process fails to
1131 fix the servo host if one is attached to the DUT.
1132 @raises AutoservSshPermissionDeniedError if it is unable
1133 to ssh to the servo host due to permission error.
1134
Richard Barnette82c35912012-11-20 10:09:10 -08001135 """
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001136 # Caution: Deleting shards relies on repair to always reboot the DUT.
1137
Dan Shi4d478522014-02-14 13:46:32 -08001138 if self._servo_host and not self.servo:
Fang Deng03590af2013-10-07 17:34:20 -07001139 try:
Dan Shi4d478522014-02-14 13:46:32 -08001140 self._servo_host.repair_full()
Fang Deng03590af2013-10-07 17:34:20 -07001141 except Exception as e:
Fang Deng03590af2013-10-07 17:34:20 -07001142 logging.error('Could not create a healthy servo: %s', e)
Dan Shi4d478522014-02-14 13:46:32 -08001143 self.servo = self._servo_host.get_servo()
Fang Deng5d518f42013-08-02 14:04:32 -07001144
MK Ryu35d661e2014-09-25 17:44:10 -07001145 self.try_collect_crashlogs()
1146
Scott Zawalski62bacae2013-03-05 10:40:32 -05001147 # TODO(scottz): This should use something similar to label_decorator,
1148 # but needs to be populated in order so DUTs are repaired with the
1149 # least amount of effort.
MK Ryu35d661e2014-09-25 17:44:10 -07001150 repair_funcs = [self._reboot_repair,
1151 self._servo_repair_power,
1152 self._powercycle_to_repair,
Dan Shi849a1c42014-03-05 11:10:43 -08001153 self._install_repair,
Dan Shi2c88eed2013-11-12 10:18:38 -08001154 self._install_repair_with_powerwash,
MK Ryu35d661e2014-09-25 17:44:10 -07001155 self._servo_repair_reinstall]
Scott Zawalski62bacae2013-03-05 10:40:32 -05001156 errors = []
Simran Basie6130932013-10-01 14:07:52 -07001157 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001158 for repair_func in repair_funcs:
1159 try:
1160 repair_func()
MK Ryu35d661e2014-09-25 17:44:10 -07001161 self.try_collect_crashlogs()
Prashanth B4d8184f2014-05-05 12:22:02 -07001162 self.check_device()
Gabe Black1e1c41b2015-02-04 23:55:15 -08001163 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001164 '%s.SUCCEEDED' % repair_func.__name__).increment()
1165 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001166 autotest_stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001167 '%s.%s.SUCCEEDED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001168 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001169 return
Simran Basie6130932013-10-01 14:07:52 -07001170 except error.AutoservRepairMethodNA as e:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001171 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001172 '%s.RepairNA' % repair_func.__name__).increment()
1173 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001174 autotest_stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001175 '%s.%s.RepairNA' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001176 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001177 logging.warning('Repair function NA: %s', e)
Simran Basie6130932013-10-01 14:07:52 -07001178 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -05001179 except Exception as e:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001180 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001181 '%s.FAILED' % repair_func.__name__).increment()
1182 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001183 autotest_stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001184 '%s.%s.FAILED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001185 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001186 logging.warning('Failed to repair device: %s', e)
Scott Zawalski62bacae2013-03-05 10:40:32 -05001187 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -05001188
Gabe Black1e1c41b2015-02-04 23:55:15 -08001189 autotest_stats.Counter('Full_Repair_Failed').increment()
Simran Basie6130932013-10-01 14:07:52 -07001190 if board:
Gabe Black1e1c41b2015-02-04 23:55:15 -08001191 autotest_stats.Counter(
Simran Basie6130932013-10-01 14:07:52 -07001192 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001193 raise error.AutoservRepairTotalFailure(
1194 'All attempts at repairing the device failed:\n%s' %
1195 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -08001196
1197
MK Ryu35d661e2014-09-25 17:44:10 -07001198 def try_collect_crashlogs(self, check_host_up=True):
1199 """
1200 Check if a host is up and logs need to be collected from the host,
1201 if yes, collect them.
1202
1203 @param check_host_up: Flag for checking host is up. Default is True.
1204 """
1205 try:
1206 crash_job = self._need_crash_logs()
1207 if crash_job:
1208 logging.debug('%s: Job %s was crashed', self._CRASHLOGS_PREFIX,
1209 crash_job)
1210 if not check_host_up or self.is_up(
1211 self._CHECK_HOST_UP_TIMEOUT_SECS):
1212 self._collect_crashlogs(crash_job)
1213 logging.debug('%s: Completed collecting logs for the '
1214 'crashed job %s', self._CRASHLOGS_PREFIX,
1215 crash_job)
1216 except Exception as e:
1217 # Exception should not result in repair failure.
1218 # Therefore, suppress all exceptions here.
1219 logging.error('%s: Failed while trying to collect crash-logs: %s',
1220 self._CRASHLOGS_PREFIX, e)
1221
1222
1223 def _need_crash_logs(self):
1224 """Get the value of need_crash_logs attribute of this host.
1225
1226 @return: Value string of need_crash_logs attribute
1227 None if there is no need_crash_logs attribute
1228 """
1229 attrs = self._AFE.get_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1230 hostname=self.hostname)
1231 assert len(attrs) < 2
1232 return attrs[0].value if attrs else None
1233
1234
1235 def _collect_crashlogs(self, job_id):
1236 """Grab logs from the host where a job was crashed.
1237
1238 First, check if PRIOR_LOGS_DIR exists in the host.
1239 If yes, collect them.
1240 Otherwise, check if a lab-machine marker (_LAB_MACHINE_FILE) exists
1241 in the host.
1242 If yes, the host was repaired automatically, and we collect normal
1243 system logs.
1244
1245 @param job_id: Id of the job that was crashed.
1246 """
1247 crashlogs_dir = crashcollect.get_crashinfo_dir(self,
1248 constants.CRASHLOGS_DEST_DIR_PREFIX)
1249 flag_prior_logs = False
1250
1251 if self.path_exists(client_constants.PRIOR_LOGS_DIR):
1252 flag_prior_logs = True
1253 self._collect_prior_logs(crashlogs_dir)
1254 elif self.path_exists(self._LAB_MACHINE_FILE):
1255 self._collect_system_logs(crashlogs_dir)
1256 else:
1257 logging.warning('%s: Host was manually re-installed without '
1258 '--lab_preserve_log option. Skip collecting '
1259 'crash-logs.', self._CRASHLOGS_PREFIX)
1260
1261 # We make crash collection be one-time effort.
1262 # _collect_prior_logs() and _collect_system_logs() will not throw
1263 # any exception, and following codes will be executed even when
1264 # those methods fail.
1265 # _collect_crashlogs() is called only when the host is up (refer
1266 # to try_collect_crashlogs()). We assume _collect_prior_logs() and
1267 # _collect_system_logs() fail rarely when the host is up.
1268 # In addition, it is not clear how many times we should try crash
1269 # collection again while not triggering next repair unnecessarily.
1270 # Threfore, we try crash collection one time.
1271
1272 # Create a marker file as soon as log collection is done.
1273 # Leave the job id to this marker for gs_offloader to consume.
1274 marker_file = os.path.join(crashlogs_dir, constants.CRASHLOGS_MARKER)
1275 with open(marker_file, 'a') as f:
1276 f.write('%s\n' % job_id)
1277
1278 # Remove need_crash_logs attribute
1279 logging.debug('%s: Remove attribute need_crash_logs from host %s',
1280 self._CRASHLOGS_PREFIX, self.hostname)
1281 self._AFE.set_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1282 None, hostname=self.hostname)
1283
1284 if flag_prior_logs:
1285 logging.debug('%s: Remove %s from host %s', self._CRASHLOGS_PREFIX,
1286 client_constants.PRIOR_LOGS_DIR, self.hostname)
1287 self.run('rm -rf %s; sync' % client_constants.PRIOR_LOGS_DIR)
1288 # Wait for a few seconds to make sure the prior command is
1289 # done deep through storage.
1290 time.sleep(self._SAFE_WAIT_SECS)
1291
1292
1293 def _collect_prior_logs(self, crashlogs_dir):
1294 """Grab prior logs that were stashed before re-installing a host.
1295
1296 @param crashlogs_dir: Directory path where crash-logs are stored.
1297 """
1298 logging.debug('%s: Found %s, collecting them...',
1299 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1300 try:
1301 self.collect_logs(client_constants.PRIOR_LOGS_DIR,
1302 crashlogs_dir, False)
1303 logging.debug('%s: %s is collected',
1304 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1305 except Exception as e:
1306 logging.error('%s: Failed to collect %s: %s',
1307 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR,
1308 e)
1309
1310
1311 def _collect_system_logs(self, crashlogs_dir):
1312 """Grab normal system logs from a host.
1313
1314 @param crashlogs_dir: Directory path where crash-logs are stored.
1315 """
1316 logging.debug('%s: Found %s, collecting system logs...',
1317 self._CRASHLOGS_PREFIX, self._LAB_MACHINE_FILE)
1318 sources = server_utils.parse_simple_config(self._LOGS_TO_COLLECT_FILE)
1319 for src in sources:
1320 try:
1321 if self.path_exists(src):
1322 logging.debug('%s: Collecting %s...',
1323 self._CRASHLOGS_PREFIX, src)
1324 dest = server_utils.concat_path_except_last(
1325 crashlogs_dir, src)
1326 self.collect_logs(src, dest, False)
1327 logging.debug('%s: %s is collected',
1328 self._CRASHLOGS_PREFIX, src)
1329 except Exception as e:
1330 logging.error('%s: Failed to collect %s: %s',
1331 self._CRASHLOGS_PREFIX, src, e)
1332
1333
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001334 def close(self):
beeps32a63082013-08-22 14:02:29 -07001335 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -07001336 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001337
1338
Dan Shi49ca0932014-11-14 11:22:27 -08001339 def get_power_supply_info(self):
1340 """Get the output of power_supply_info.
1341
1342 power_supply_info outputs the info of each power supply, e.g.,
1343 Device: Line Power
1344 online: no
1345 type: Mains
1346 voltage (V): 0
1347 current (A): 0
1348 Device: Battery
1349 state: Discharging
1350 percentage: 95.9276
1351 technology: Li-ion
1352
1353 Above output shows two devices, Line Power and Battery, with details of
1354 each device listed. This function parses the output into a dictionary,
1355 with key being the device name, and value being a dictionary of details
1356 of the device info.
1357
1358 @return: The dictionary of power_supply_info, e.g.,
1359 {'Line Power': {'online': 'yes', 'type': 'main'},
1360 'Battery': {'vendor': 'xyz', 'percentage': '100'}}
Dan Shie9b765d2014-12-29 16:59:49 -08001361 @raise error.AutoservRunError if power_supply_info tool is not found in
1362 the DUT. Caller should handle this error to avoid false failure
1363 on verification.
Dan Shi49ca0932014-11-14 11:22:27 -08001364 """
1365 result = self.run('power_supply_info').stdout.strip()
1366 info = {}
1367 device_name = None
1368 device_info = {}
1369 for line in result.split('\n'):
1370 pair = [v.strip() for v in line.split(':')]
1371 if len(pair) != 2:
1372 continue
1373 if pair[0] == 'Device':
1374 if device_name:
1375 info[device_name] = device_info
1376 device_name = pair[1]
1377 device_info = {}
1378 else:
1379 device_info[pair[0]] = pair[1]
1380 if device_name and not device_name in info:
1381 info[device_name] = device_info
1382 return info
1383
1384
1385 def get_battery_percentage(self):
1386 """Get the battery percentage.
1387
1388 @return: The percentage of battery level, value range from 0-100. Return
1389 None if the battery info cannot be retrieved.
1390 """
1391 try:
1392 info = self.get_power_supply_info()
1393 logging.info(info)
1394 return float(info['Battery']['percentage'])
Dan Shie9b765d2014-12-29 16:59:49 -08001395 except (KeyError, ValueError, error.AutoservRunError):
Dan Shi49ca0932014-11-14 11:22:27 -08001396 return None
1397
1398
1399 def is_ac_connected(self):
1400 """Check if the dut has power adapter connected and charging.
1401
1402 @return: True if power adapter is connected and charging.
1403 """
1404 try:
1405 info = self.get_power_supply_info()
1406 return info['Line Power']['online'] == 'yes'
Dan Shie9b765d2014-12-29 16:59:49 -08001407 except (KeyError, error.AutoservRunError):
1408 return None
Dan Shi49ca0932014-11-14 11:22:27 -08001409
1410
Simran Basi5e6339a2013-03-21 11:34:32 -07001411 def _cleanup_poweron(self):
1412 """Special cleanup method to make sure hosts always get power back."""
1413 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1414 hosts = afe.get_hosts(hostname=self.hostname)
1415 if not hosts or not (self._RPM_OUTLET_CHANGED in
1416 hosts[0].attributes):
1417 return
1418 logging.debug('This host has recently interacted with the RPM'
1419 ' Infrastructure. Ensuring power is on.')
1420 try:
1421 self.power_on()
Dan Shi7dca56e2014-11-11 17:07:56 -08001422 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1423 hostname=self.hostname)
Simran Basi5e6339a2013-03-21 11:34:32 -07001424 except rpm_client.RemotePowerException:
Simran Basi5e6339a2013-03-21 11:34:32 -07001425 logging.error('Failed to turn Power On for this host after '
1426 'cleanup through the RPM Infrastructure.')
Gabe Blackb72f4fb2015-01-20 16:47:13 -08001427 autotest_es.post(
Dan Shi7dca56e2014-11-11 17:07:56 -08001428 type_str='RPM_poweron_failure',
1429 metadata={'hostname': self.hostname})
Dan Shi49ca0932014-11-14 11:22:27 -08001430
1431 battery_percentage = self.get_battery_percentage()
Dan Shif01ebe22014-12-05 13:10:57 -08001432 if battery_percentage and battery_percentage < 50:
Dan Shi49ca0932014-11-14 11:22:27 -08001433 raise
1434 elif self.is_ac_connected():
1435 logging.info('The device has power adapter connected and '
1436 'charging. No need to try to turn RPM on '
1437 'again.')
1438 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1439 hostname=self.hostname)
1440 logging.info('Battery level is now at %s%%. The device may '
1441 'still have enough power to run test, so no '
1442 'exception will be raised.', battery_percentage)
1443
Simran Basi5e6339a2013-03-21 11:34:32 -07001444
beepsc87ff602013-07-31 21:53:00 -07001445 def _is_factory_image(self):
1446 """Checks if the image on the DUT is a factory image.
1447
1448 @return: True if the image on the DUT is a factory image.
1449 False otherwise.
1450 """
1451 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1452 return result.exit_status == 0
1453
1454
1455 def _restart_ui(self):
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001456 """Restart the Chrome UI.
beepsc87ff602013-07-31 21:53:00 -07001457
1458 @raises: FactoryImageCheckerException for factory images, since
1459 we cannot attempt to restart ui on them.
1460 error.AutoservRunError for any other type of error that
1461 occurs while restarting ui.
1462 """
1463 if self._is_factory_image():
1464 raise FactoryImageCheckerException('Cannot restart ui on factory '
1465 'images')
1466
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001467 # TODO(jrbarnette): The command to stop/start the ui job
1468 # should live inside cros_ui, too. However that would seem
1469 # to imply interface changes to the existing start()/restart()
1470 # functions, which is a bridge too far (for now).
1471 prompt = cros_ui.get_login_prompt_state(self)
1472 self.run('stop ui; start ui')
1473 cros_ui.wait_for_chrome_ready(prompt, self)
beepsc87ff602013-07-31 21:53:00 -07001474
1475
1476 def cleanup(self):
MK Ryu35d661e2014-09-25 17:44:10 -07001477 self.run('rm -f %s' % client_constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001478 try:
beepsc87ff602013-07-31 21:53:00 -07001479 self._restart_ui()
1480 except (error.AutotestRunError, error.AutoservRunError,
1481 FactoryImageCheckerException):
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001482 logging.warning('Unable to restart ui, rebooting device.')
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001483 # Since restarting the UI fails fall back to normal Autotest
1484 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001485 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001486 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001487 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001488 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001489
1490
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001491 def reboot(self, **dargs):
1492 """
1493 This function reboots the site host. The more generic
1494 RemoteHost.reboot() performs sync and sleeps for 5
1495 seconds. This is not necessary for Chrome OS devices as the
1496 sync should be finished in a short time during the reboot
1497 command.
1498 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001499 if 'reboot_cmd' not in dargs:
Doug Anderson7d5aeb22014-02-27 15:12:17 -08001500 reboot_timeout = dargs.get('reboot_timeout', 10)
1501 dargs['reboot_cmd'] = ('((reboot & sleep %d; reboot -f &)'
1502 ' </dev/null >/dev/null 2>&1 &)' %
1503 reboot_timeout)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001504 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001505 if 'fastsync' not in dargs:
1506 dargs['fastsync'] = True
Michael Liangda8c60a2014-06-03 13:24:51 -07001507
Charlie Mooneya8e6dab2014-05-29 14:37:55 -07001508 # For purposes of logging reboot times:
1509 # Get the board name i.e. 'daisy_spring'
Michael Liangca4f5a62014-07-10 15:45:13 -07001510 board_fullname = self.get_board()
1511
1512 # Strip the prefix and add it to dargs.
1513 dargs['board'] = board_fullname[board_fullname.find(':')+1:]
Fang Deng0ca40e22013-08-27 17:47:44 -07001514 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001515
1516
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07001517 def suspend(self, **dargs):
1518 """
1519 This function suspends the site host.
1520 """
1521 suspend_time = dargs.get('suspend_time', 60)
1522 dargs['timeout'] = suspend_time
1523 if 'suspend_cmd' not in dargs:
1524 cmd = ' && '.join(['echo 0 > /sys/class/rtc/rtc0/wakealarm',
1525 'echo +%d > /sys/class/rtc/rtc0/wakealarm' % suspend_time,
1526 'powerd_dbus_suspend --delay=0 &'])
1527 dargs['suspend_cmd'] = ('(( %s )'
1528 '< /dev/null >/dev/null 2>&1 &)' % cmd)
1529 super(CrosHost, self).suspend(**dargs)
1530
1531
Simran Basiec564392014-08-25 16:48:09 -07001532 def upstart_status(self, service_name):
1533 """Check the status of an upstart init script.
1534
1535 @param service_name: Service to look up.
1536
1537 @returns True if the service is running, False otherwise.
1538 """
1539 return self.run('status %s | grep start/running' %
1540 service_name).stdout.strip() != ''
1541
1542
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001543 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001544 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001545
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001546 Tests for the following conditions:
1547 1. All conditions tested by the parent version of this
1548 function.
1549 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001550 3. Sufficient space in /mnt/stateful_partition/encrypted.
1551 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001552
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001553 """
MK Ryu35d661e2014-09-25 17:44:10 -07001554 # Check if a job was crashed on this host.
1555 # If yes, avoid verification until crash-logs are collected.
1556 if self._need_crash_logs():
1557 raise error.AutoservCrashLogCollectRequired(
1558 'Need to collect crash-logs before verification')
1559
Fang Deng0ca40e22013-08-27 17:47:44 -07001560 super(CrosHost, self).verify_software()
J. Richard Barnette4164d1d2014-12-02 17:52:33 -08001561 self.check_inodes(
1562 '/mnt/stateful_partition',
1563 global_config.global_config.get_config_value(
1564 'SERVER', 'kilo_inodes_required', type=int,
1565 default=100))
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001566 self.check_diskspace(
1567 '/mnt/stateful_partition',
1568 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001569 'SERVER', 'gb_diskspace_required', type=float,
1570 default=20.0))
Gaurav Shahe448af82014-06-19 15:18:59 -07001571 encrypted_stateful_path = '/mnt/stateful_partition/encrypted'
1572 # Not all targets build with encrypted stateful support.
1573 if self.path_exists(encrypted_stateful_path):
1574 self.check_diskspace(
1575 encrypted_stateful_path,
1576 global_config.global_config.get_config_value(
1577 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1578 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001579
Simran Basiec564392014-08-25 16:48:09 -07001580 if not self.upstart_status('system-services'):
Prashanth B5d0a0512014-04-25 12:26:08 -07001581 raise error.AutoservError('Chrome failed to reach login. '
1582 'System services not running.')
1583
beepsc87ff602013-07-31 21:53:00 -07001584 # Factory images don't run update engine,
1585 # goofy controls dbus on these DUTs.
1586 if not self._is_factory_image():
1587 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001588 # Makes sure python is present, loads and can use built in functions.
1589 # We have seen cases where importing cPickle fails with undefined
1590 # symbols in cPickle.so.
1591 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001592
1593
Dan Shi49ca0932014-11-14 11:22:27 -08001594 def verify_hardware(self):
1595 """Verify hardware system of a Chrome OS system.
1596
1597 Check following hardware conditions:
1598 1. Battery level.
1599 2. Is power adapter connected.
1600 """
1601 logging.info('Battery percentage: %s', self.get_battery_percentage())
Dan Shie9b765d2014-12-29 16:59:49 -08001602 if self.is_ac_connected() is None:
1603 logging.info('Can not determine if the device has power adapter '
1604 'connected.')
1605 else:
1606 logging.info('Device %s power adapter connected and charging.',
1607 'has' if self.is_ac_connected() else 'does not have')
Dan Shi49ca0932014-11-14 11:22:27 -08001608
1609
Fang Deng96667ca2013-08-01 17:46:18 -07001610 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1611 connect_timeout=None, alive_interval=None):
1612 """Override default make_ssh_command to use options tuned for Chrome OS.
1613
1614 Tuning changes:
1615 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1616 connection failure. Consistency with remote_access.sh.
1617
1618 - ServerAliveInterval=180; which causes SSH to ping connection every
1619 180 seconds. In conjunction with ServerAliveCountMax ensures
1620 that if the connection dies, Autotest will bail out quickly.
1621 Originally tried 60 secs, but saw frequent job ABORTS where
1622 the test completed successfully.
1623
1624 - ServerAliveCountMax=3; consistency with remote_access.sh.
1625
1626 - ConnectAttempts=4; reduce flakiness in connection errors;
1627 consistency with remote_access.sh.
1628
1629 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1630 Host keys change with every new installation, don't waste
1631 memory/space saving them.
1632
1633 - SSH protocol forced to 2; needed for ServerAliveInterval.
1634
1635 @param user User name to use for the ssh connection.
1636 @param port Port on the target host to use for ssh connection.
1637 @param opts Additional options to the ssh command.
1638 @param hosts_file Ignored.
1639 @param connect_timeout Ignored.
1640 @param alive_interval Ignored.
1641 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001642 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1643 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001644 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1645 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1646 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1647 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001648 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1649 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001650
1651
beeps32a63082013-08-22 14:02:29 -07001652 def _create_ssh_tunnel(self, port, local_port):
1653 """Create an ssh tunnel from local_port to port.
1654
1655 @param port: remote port on the host.
1656 @param local_port: local forwarding port.
1657
1658 @return: the tunnel process.
1659 """
1660 # Chrome OS on the target closes down most external ports
1661 # for security. We could open the port, but doing that
1662 # would conflict with security tests that check that only
1663 # expected ports are open. So, to get to the port on the
1664 # target we use an ssh tunnel.
1665 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1666 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1667 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1668 logging.debug('Full tunnel command: %s', tunnel_cmd)
1669 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1670 logging.debug('Started ssh tunnel, local = %d'
1671 ' remote = %d, pid = %d',
1672 local_port, port, tunnel_proc.pid)
1673 return tunnel_proc
1674
1675
Christopher Wileydd181852013-10-10 19:56:58 -07001676 def _setup_rpc(self, port, command_name, remote_pid=None):
beeps32a63082013-08-22 14:02:29 -07001677 """Sets up a tunnel process and performs rpc connection book keeping.
1678
1679 This method assumes that xmlrpc and jsonrpc never conflict, since
1680 we can only either have an xmlrpc or a jsonrpc server listening on
1681 a remote port. As such, it enforces a single proxy->remote port
1682 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1683 and then tries to start an xmlrpc proxy forwarded to the same port,
1684 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1685
1686 1. None of the methods on the xmlrpc proxy will work because
1687 the server listening on B is jsonrpc.
1688
1689 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1690 server, as the only use case currently is goofy, which is tied to
1691 the factory image. It is much easier to handle a failed xmlrpc
1692 call on the client than it is to terminate goofy in this scenario,
1693 as doing the latter might leave the DUT in a hard to recover state.
1694
1695 With the current implementation newer rpc proxy connections will
1696 terminate the tunnel processes of older rpc connections tunneling
1697 to the same remote port. If methods are invoked on the client
1698 after this has happened they will fail with connection closed errors.
1699
1700 @param port: The remote forwarding port.
1701 @param command_name: The name of the remote process, to terminate
1702 using pkill.
1703
1704 @return A url that we can use to initiate the rpc connection.
1705 """
1706 self.rpc_disconnect(port)
1707 local_port = utils.get_unused_port()
1708 tunnel_proc = self._create_ssh_tunnel(port, local_port)
Christopher Wileydd181852013-10-10 19:56:58 -07001709 self._rpc_proxy_map[port] = (command_name, tunnel_proc, remote_pid)
beeps32a63082013-08-22 14:02:29 -07001710 return self._RPC_PROXY_URL % local_port
1711
1712
Christopher Wileyd78249a2013-03-01 13:05:31 -08001713 def xmlrpc_connect(self, command, port, command_name=None,
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001714 ready_test_name=None, timeout_seconds=10,
1715 logfile='/dev/null'):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001716 """Connect to an XMLRPC server on the host.
1717
1718 The `command` argument should be a simple shell command that
1719 starts an XMLRPC server on the given `port`. The command
1720 must not daemonize, and must terminate cleanly on SIGTERM.
1721 The command is started in the background on the host, and a
1722 local XMLRPC client for the server is created and returned
1723 to the caller.
1724
1725 Note that the process of creating an XMLRPC client makes no
1726 attempt to connect to the remote server; the caller is
1727 responsible for determining whether the server is running
1728 correctly, and is ready to serve requests.
1729
Christopher Wileyd78249a2013-03-01 13:05:31 -08001730 Optionally, the caller can pass ready_test_name, a string
1731 containing the name of a method to call on the proxy. This
1732 method should take no parameters and return successfully only
1733 when the server is ready to process client requests. When
1734 ready_test_name is set, xmlrpc_connect will block until the
1735 proxy is ready, and throw a TestError if the server isn't
1736 ready by timeout_seconds.
1737
beeps32a63082013-08-22 14:02:29 -07001738 If a server is already running on the remote port, this
1739 method will kill it and disconnect the tunnel process
1740 associated with the connection before establishing a new one,
1741 by consulting the rpc_proxy_map in rpc_disconnect.
1742
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001743 @param command Shell command to start the server.
1744 @param port Port number on which the server is expected to
1745 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001746 @param command_name String to use as input to `pkill` to
1747 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001748 @param ready_test_name String containing the name of a
1749 method defined on the XMLRPC server.
1750 @param timeout_seconds Number of seconds to wait
1751 for the server to become 'ready.' Will throw a
1752 TestFail error if server is not ready in time.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001753 @param logfile Logfile to send output when running
1754 'command' argument.
Yusuf Mohsinally8d19e3c2013-11-21 14:25:45 -08001755
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001756 """
Christopher Wileyc14f06a2013-10-16 13:55:39 -07001757 # Clean up any existing state. If the caller is willing
1758 # to believe their server is down, we ought to clean up
1759 # any tunnels we might have sitting around.
1760 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001761 # Start the server on the host. Redirection in the command
1762 # below is necessary, because 'ssh' won't terminate until
1763 # background child processes close stdin, stdout, and
1764 # stderr.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001765 remote_cmd = '%s </dev/null >%s 2>&1 & echo $!' % (command, logfile)
Christopher Wileydd181852013-10-10 19:56:58 -07001766 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001767 logging.debug('Started XMLRPC server on host %s, pid = %s',
1768 self.hostname, remote_pid)
1769
Christopher Wileydd181852013-10-10 19:56:58 -07001770 # Tunnel through SSH to be able to reach that remote port.
1771 rpc_url = self._setup_rpc(port, command_name, remote_pid=remote_pid)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001772 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001773
Christopher Wileyd78249a2013-03-01 13:05:31 -08001774 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001775 # retry.retry logs each attempt; calculate delay_sec to
1776 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001777 @retry.retry((socket.error,
1778 xmlrpclib.ProtocolError,
1779 httplib.BadStatusLine),
Chris Sosa65425082013-10-16 13:26:22 -07001780 timeout_min=timeout_seconds / 60.0,
1781 delay_sec=min(max(timeout_seconds / 20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001782 def ready_test():
1783 """ Call proxy.ready_test_name(). """
1784 getattr(proxy, ready_test_name)()
1785 successful = False
1786 try:
1787 logging.info('Waiting %d seconds for XMLRPC server '
1788 'to start.', timeout_seconds)
1789 ready_test()
1790 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001791 finally:
1792 if not successful:
1793 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001794 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001795 logging.info('XMLRPC server started successfully.')
1796 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001797
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001798
Jason Abeleb6f924f2013-11-13 16:01:54 -08001799 def syslog(self, message, tag='autotest'):
1800 """Logs a message to syslog on host.
1801
1802 @param message String message to log into syslog
1803 @param tag String tag prefix for syslog
1804
1805 """
1806 self.run('logger -t "%s" "%s"' % (tag, message))
1807
1808
beeps32a63082013-08-22 14:02:29 -07001809 def jsonrpc_connect(self, port):
1810 """Creates a jsonrpc proxy connection through an ssh tunnel.
1811
1812 This method exists to facilitate communication with goofy (which is
1813 the default system manager on all factory images) and as such, leaves
1814 most of the rpc server sanity checking to the caller. Unlike
1815 xmlrpc_connect, this method does not facilitate the creation of a remote
1816 jsonrpc server, as the only clients of this code are factory tests,
1817 for which the goofy system manager is built in to the image and starts
1818 when the target boots.
1819
1820 One can theoretically create multiple jsonrpc proxies all forwarded
1821 to the same remote port, provided the remote port has an rpc server
1822 listening. However, in doing so we stand the risk of leaking an
1823 existing tunnel process, so we always disconnect any older tunnels
1824 we might have through rpc_disconnect.
1825
1826 @param port: port on the remote host that is serving this proxy.
1827
1828 @return: The client proxy.
1829 """
1830 if not jsonrpclib:
1831 logging.warning('Jsonrpclib could not be imported. Check that '
1832 'site-packages contains jsonrpclib.')
1833 return None
1834
1835 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1836
1837 logging.info('Established a jsonrpc connection through port %s.', port)
1838 return proxy
1839
1840
1841 def rpc_disconnect(self, port):
1842 """Disconnect from an RPC server on the host.
1843
1844 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001845 the given `port`. Also closes the local ssh tunnel created
1846 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001847 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001848 client object; however disconnection will cause all
1849 subsequent calls to methods on the object to fail.
1850
1851 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001852 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001853
1854 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001855 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001856 """
beeps32a63082013-08-22 14:02:29 -07001857 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001858 return
Christopher Wileydd181852013-10-10 19:56:58 -07001859 remote_name, tunnel_proc, remote_pid = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001860 if remote_name:
1861 # We use 'pkill' to find our target process rather than
1862 # a PID, because the host may have rebooted since
1863 # connecting, and we don't want to kill an innocent
1864 # process with the same PID.
1865 #
1866 # 'pkill' helpfully exits with status 1 if no target
1867 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001868 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001869 # status.
1870 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001871 if remote_pid:
1872 logging.info('Waiting for RPC server "%s" shutdown',
1873 remote_name)
1874 start_time = time.time()
1875 while (time.time() - start_time <
1876 self._RPC_SHUTDOWN_TIMEOUT_SECONDS):
1877 running_processes = self.run(
1878 "pgrep -f '%s'" % remote_name,
1879 ignore_status=True).stdout.split()
1880 if not remote_pid in running_processes:
1881 logging.info('Shut down RPC server.')
1882 break
1883 time.sleep(self._RPC_SHUTDOWN_POLLING_PERIOD_SECONDS)
1884 else:
1885 raise error.TestError('Failed to shutdown RPC server %s' %
1886 remote_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001887
1888 if tunnel_proc.poll() is None:
1889 tunnel_proc.terminate()
1890 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1891 else:
1892 logging.debug('Tunnel pid %d terminated early, status %d',
1893 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001894 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001895
1896
beeps32a63082013-08-22 14:02:29 -07001897 def rpc_disconnect_all(self):
1898 """Disconnect all known RPC proxy ports."""
1899 for port in self._rpc_proxy_map.keys():
1900 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001901
1902
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001903 def poor_mans_rpc(self, fun):
1904 """
1905 Calls a function from client utils on the host and returns a string.
1906
1907 @param fun function in client utils namespace.
1908 @return output string from calling fun.
1909 """
Simran Basi263a9d32014-08-19 11:16:51 -07001910 script = 'cd %s/bin; ' % autotest.Autotest.get_installed_autodir(self)
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001911 script += 'python -c "import common; import utils;'
1912 script += 'print utils.%s"' % fun
1913 return script
1914
1915
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001916 def _ping_check_status(self, status):
1917 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001918
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001919 @param status Check the ping status against this value.
1920 @return True iff `status` and the result of ping are the same
1921 (i.e. both True or both False).
1922
1923 """
1924 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1925 return not (status ^ (ping_val == 0))
1926
1927 def _ping_wait_for_status(self, status, timeout):
1928 """Wait for the host to have a given status (UP or DOWN).
1929
1930 Status is checked by polling. Polling will not last longer
1931 than the number of seconds in `timeout`. The polling
1932 interval will be long enough that only approximately
1933 _PING_WAIT_COUNT polling cycles will be executed, subject
1934 to a maximum interval of about one minute.
1935
1936 @param status Waiting will stop immediately if `ping` of the
1937 host returns this status.
1938 @param timeout Poll for at most this many seconds.
1939 @return True iff the host status from `ping` matched the
1940 requested status at the time of return.
1941
1942 """
1943 # _ping_check_status() takes about 1 second, hence the
1944 # "- 1" in the formula below.
1945 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1946 end_time = time.time() + timeout
1947 while time.time() <= end_time:
1948 if self._ping_check_status(status):
1949 return True
1950 if poll_interval > 0:
1951 time.sleep(poll_interval)
1952
1953 # The last thing we did was sleep(poll_interval), so it may
1954 # have been too long since the last `ping`. Check one more
1955 # time, just to be sure.
1956 return self._ping_check_status(status)
1957
1958 def ping_wait_up(self, timeout):
1959 """Wait for the host to respond to `ping`.
1960
1961 N.B. This method is not a reliable substitute for
1962 `wait_up()`, because a host that responds to ping will not
1963 necessarily respond to ssh. This method should only be used
1964 if the target DUT can be considered functional even if it
1965 can't be reached via ssh.
1966
1967 @param timeout Minimum time to allow before declaring the
1968 host to be non-responsive.
1969 @return True iff the host answered to ping before the timeout.
1970
1971 """
1972 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001973
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001974 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001975 """Wait until the host no longer responds to `ping`.
1976
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001977 This function can be used as a slightly faster version of
1978 `wait_down()`, by avoiding potentially long ssh timeouts.
1979
1980 @param timeout Minimum time to allow for the host to become
1981 non-responsive.
1982 @return True iff the host quit answering ping before the
1983 timeout.
1984
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001985 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001986 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001987
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001988 def test_wait_for_sleep(self, sleep_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001989 """Wait for the client to enter low-power sleep mode.
1990
1991 The test for "is asleep" can't distinguish a system that is
1992 powered off; to confirm that the unit was asleep, it is
1993 necessary to force resume, and then call
1994 `test_wait_for_resume()`.
1995
1996 This function is expected to be called from a test as part
1997 of a sequence like the following:
1998
1999 ~~~~~~~~
2000 boot_id = host.get_boot_id()
2001 # trigger sleep on the host
2002 host.test_wait_for_sleep()
2003 # trigger resume on the host
2004 host.test_wait_for_resume(boot_id)
2005 ~~~~~~~~
2006
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002007 @param sleep_timeout time limit in seconds to allow the host sleep.
2008
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002009 @exception TestFail The host did not go to sleep within
2010 the allowed time.
2011 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002012 if sleep_timeout is None:
2013 sleep_timeout = self.SLEEP_TIMEOUT
2014
2015 if not self.ping_wait_down(timeout=sleep_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002016 raise error.TestFail(
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002017 'client failed to sleep after %d seconds' % sleep_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002018
2019
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002020 def test_wait_for_resume(self, old_boot_id, resume_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002021 """Wait for the client to resume from low-power sleep mode.
2022
2023 The `old_boot_id` parameter should be the value from
2024 `get_boot_id()` obtained prior to entering sleep mode. A
2025 `TestFail` exception is raised if the boot id changes.
2026
2027 See @ref test_wait_for_sleep for more on this function's
2028 usage.
2029
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002030 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002031 target host went to sleep.
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002032 @param resume_timeout time limit in seconds to allow the host up.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002033
2034 @exception TestFail The host did not respond within the
2035 allowed time.
2036 @exception TestFail The host responded, but the boot id test
2037 indicated a reboot rather than a sleep
2038 cycle.
2039 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002040 if resume_timeout is None:
2041 resume_timeout = self.RESUME_TIMEOUT
2042
2043 if not self.wait_up(timeout=resume_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002044 raise error.TestFail(
2045 'client failed to resume from sleep after %d seconds' %
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002046 resume_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002047 else:
2048 new_boot_id = self.get_boot_id()
2049 if new_boot_id != old_boot_id:
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002050 logging.error('client rebooted (old boot %s, new boot %s)',
2051 old_boot_id, new_boot_id)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002052 raise error.TestFail(
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002053 'client rebooted, but sleep was expected')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002054
2055
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002056 def test_wait_for_shutdown(self, shutdown_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002057 """Wait for the client to shut down.
2058
2059 The test for "has shut down" can't distinguish a system that
2060 is merely asleep; to confirm that the unit was down, it is
2061 necessary to force boot, and then call test_wait_for_boot().
2062
2063 This function is expected to be called from a test as part
2064 of a sequence like the following:
2065
2066 ~~~~~~~~
2067 boot_id = host.get_boot_id()
2068 # trigger shutdown on the host
2069 host.test_wait_for_shutdown()
2070 # trigger boot on the host
2071 host.test_wait_for_boot(boot_id)
2072 ~~~~~~~~
2073
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002074 @param shutdown_timeout time limit in seconds to allow the host down.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002075 @exception TestFail The host did not shut down within the
2076 allowed time.
2077 """
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002078 if shutdown_timeout is None:
2079 shutdown_timeout = self.SHUTDOWN_TIMEOUT
2080
2081 if not self.ping_wait_down(timeout=shutdown_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002082 raise error.TestFail(
2083 'client failed to shut down after %d seconds' %
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002084 shutdown_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002085
2086
2087 def test_wait_for_boot(self, old_boot_id=None):
2088 """Wait for the client to boot from cold power.
2089
2090 The `old_boot_id` parameter should be the value from
2091 `get_boot_id()` obtained prior to shutting down. A
2092 `TestFail` exception is raised if the boot id does not
2093 change. The boot id test is omitted if `old_boot_id` is not
2094 specified.
2095
2096 See @ref test_wait_for_shutdown for more on this function's
2097 usage.
2098
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002099 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002100 shut down.
2101
2102 @exception TestFail The host did not respond within the
2103 allowed time.
2104 @exception TestFail The host responded, but the boot id test
2105 indicated that there was no reboot.
2106 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002107 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002108 raise error.TestFail(
2109 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002110 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002111 elif old_boot_id:
2112 if self.get_boot_id() == old_boot_id:
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002113 logging.error('client not rebooted (boot %s)',
2114 old_boot_id)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002115 raise error.TestFail(
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002116 'client is back up, but did not reboot')
Simran Basid5e5e272012-09-24 15:23:59 -07002117
2118
2119 @staticmethod
2120 def check_for_rpm_support(hostname):
2121 """For a given hostname, return whether or not it is powered by an RPM.
2122
Simran Basi1df55112013-09-06 11:25:09 -07002123 @param hostname: hostname to check for rpm support.
2124
Simran Basid5e5e272012-09-24 15:23:59 -07002125 @return None if this host does not follows the defined naming format
2126 for RPM powered DUT's in the lab. If it does follow the format,
2127 it returns a regular expression MatchObject instead.
2128 """
Fang Dengbaff9082015-01-06 13:46:15 -08002129 return re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002130
2131
2132 def has_power(self):
2133 """For this host, return whether or not it is powered by an RPM.
2134
2135 @return True if this host is in the CROS lab and follows the defined
2136 naming format.
2137 """
Fang Deng0ca40e22013-08-27 17:47:44 -07002138 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002139
2140
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002141 def _set_power(self, state, power_method):
2142 """Sets the power to the host via RPM, Servo or manual.
2143
2144 @param state Specifies which power state to set to DUT
2145 @param power_method Specifies which method of power control to
2146 use. By default "RPM" will be used. Valid values
2147 are the strings "RPM", "manual", "servoj10".
2148
2149 """
2150 ACCEPTABLE_STATES = ['ON', 'OFF']
2151
2152 if state.upper() not in ACCEPTABLE_STATES:
2153 raise error.TestError('State must be one of: %s.'
2154 % (ACCEPTABLE_STATES,))
2155
2156 if power_method == self.POWER_CONTROL_SERVO:
2157 logging.info('Setting servo port J10 to %s', state)
2158 self.servo.set('prtctl3_pwren', state.lower())
2159 time.sleep(self._USB_POWER_TIMEOUT)
2160 elif power_method == self.POWER_CONTROL_MANUAL:
2161 logging.info('You have %d seconds to set the AC power to %s.',
2162 self._POWER_CYCLE_TIMEOUT, state)
2163 time.sleep(self._POWER_CYCLE_TIMEOUT)
2164 else:
2165 if not self.has_power():
2166 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07002167 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
2168 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
2169 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07002170 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07002171
2172
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002173 def power_off(self, power_method=POWER_CONTROL_RPM):
2174 """Turn off power to this host via RPM, Servo or manual.
2175
2176 @param power_method Specifies which method of power control to
2177 use. By default "RPM" will be used. Valid values
2178 are the strings "RPM", "manual", "servoj10".
2179
2180 """
2181 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07002182
2183
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002184 def power_on(self, power_method=POWER_CONTROL_RPM):
2185 """Turn on power to this host via RPM, Servo or manual.
2186
2187 @param power_method Specifies which method of power control to
2188 use. By default "RPM" will be used. Valid values
2189 are the strings "RPM", "manual", "servoj10".
2190
2191 """
2192 self._set_power('ON', power_method)
2193
2194
2195 def power_cycle(self, power_method=POWER_CONTROL_RPM):
2196 """Cycle power to this host by turning it OFF, then ON.
2197
2198 @param power_method Specifies which method of power control to
2199 use. By default "RPM" will be used. Valid values
2200 are the strings "RPM", "manual", "servoj10".
2201
2202 """
2203 if power_method in (self.POWER_CONTROL_SERVO,
2204 self.POWER_CONTROL_MANUAL):
2205 self.power_off(power_method=power_method)
2206 time.sleep(self._POWER_CYCLE_TIMEOUT)
2207 self.power_on(power_method=power_method)
2208 else:
2209 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002210
2211
2212 def get_platform(self):
2213 """Determine the correct platform label for this host.
2214
2215 @returns a string representing this host's platform.
2216 """
2217 crossystem = utils.Crossystem(self)
2218 crossystem.init()
2219 # Extract fwid value and use the leading part as the platform id.
2220 # fwid generally follow the format of {platform}.{firmware version}
2221 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
2222 platform = crossystem.fwid().split('.')[0].lower()
2223 # Newer platforms start with 'Google_' while the older ones do not.
2224 return platform.replace('google_', '')
2225
2226
Hung-ying Tyanb1328032014-04-01 14:18:54 +08002227 def get_architecture(self):
2228 """Determine the correct architecture label for this host.
2229
2230 @returns a string representing this host's architecture.
2231 """
2232 crossystem = utils.Crossystem(self)
2233 crossystem.init()
2234 return crossystem.arch()
2235
2236
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002237 def get_chrome_version(self):
2238 """Gets the Chrome version number and milestone as strings.
2239
2240 Invokes "chrome --version" to get the version number and milestone.
2241
2242 @return A tuple (chrome_ver, milestone) where "chrome_ver" is the
2243 current Chrome version number as a string (in the form "W.X.Y.Z")
2244 and "milestone" is the first component of the version number
2245 (the "W" from "W.X.Y.Z"). If the version number cannot be parsed
2246 in the "W.X.Y.Z" format, the "chrome_ver" will be the full output
2247 of "chrome --version" and the milestone will be the empty string.
2248
2249 """
MK Ryu35d661e2014-09-25 17:44:10 -07002250 version_string = self.run(client_constants.CHROME_VERSION_COMMAND).stdout
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002251 return utils.parse_chrome_version(version_string)
2252
Aviv Keshet74c89a92013-02-04 15:18:30 -08002253 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07002254 def get_board(self):
2255 """Determine the correct board label for this host.
2256
2257 @returns a string representing this host's board.
2258 """
2259 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
2260 run_method=self.run)
2261 board = release_info['CHROMEOS_RELEASE_BOARD']
2262 # Devices in the lab generally have the correct board name but our own
2263 # development devices have {board_name}-signed-{key_type}. The board
2264 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08002265 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07002266 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08002267 return board_format_string % board.split('-')[0]
2268 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07002269
2270
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002271 @label_decorator('board_freq_mem')
2272 def get_board_with_frequency_and_memory(self):
2273 """
2274 Determines the board name with frequency and memory.
2275
2276 @returns a more detailed string representing the board. Examples are
2277 butterfly_1.1GHz_2GB, link_1.8GHz_4GB, x86-zgb_1.7GHz_2GB
2278 """
2279 board = self.run(self.poor_mans_rpc(
2280 'get_board_with_frequency_and_memory()')).stdout
2281 return 'board_freq_mem:%s' % str.strip(board)
2282
2283
Aviv Keshet74c89a92013-02-04 15:18:30 -08002284 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002285 def has_lightsensor(self):
2286 """Determine the correct board label for this host.
2287
2288 @returns the string 'lightsensor' if this host has a lightsensor or
2289 None if it does not.
2290 """
2291 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08002292 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07002293 try:
2294 # Run the search cmd following the symlinks. Stderr_tee is set to
2295 # None as there can be a symlink loop, but the command will still
2296 # execute correctly with a few messages printed to stderr.
2297 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
2298 return 'lightsensor'
2299 except error.AutoservRunError:
2300 # egrep exited with a return code of 1 meaning none of the possible
2301 # lightsensor files existed.
2302 return None
2303
2304
Aviv Keshet74c89a92013-02-04 15:18:30 -08002305 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002306 def has_bluetooth(self):
2307 """Determine the correct board label for this host.
2308
2309 @returns the string 'bluetooth' if this host has bluetooth or
2310 None if it does not.
2311 """
2312 try:
2313 self.run('test -d /sys/class/bluetooth/hci0')
2314 # test exited with a return code of 0.
2315 return 'bluetooth'
2316 except error.AutoservRunError:
2317 # test exited with a return code 1 meaning the directory did not
2318 # exist.
2319 return None
2320
2321
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002322 @label_decorator('gpu_family')
2323 def get_gpu_family(self):
2324 """
2325 Determine GPU family.
2326
2327 @returns a string representing the gpu family. Examples are mali, tegra,
2328 pinetrail, sandybridge, ivybridge, haswell and baytrail.
2329 """
2330 gpu_family = self.run(self.poor_mans_rpc('get_gpu_family()')).stdout
2331 return 'gpu_family:%s' % str.strip(gpu_family)
2332
2333
Ilja Friedel0ce0b602013-08-15 18:45:27 -07002334 @label_decorator('graphics')
2335 def get_graphics(self):
2336 """
2337 Determine the correct board label for this host.
2338
2339 @returns a string representing this host's graphics. For now ARM boards
2340 return graphics:gles while all other boards return graphics:gl. This
2341 may change over time, but for robustness reasons this should avoid
2342 executing code in actual graphics libraries (which may not be ready and
2343 is tested by graphics_GLAPICheck).
2344 """
2345 uname = self.run('uname -a').stdout.lower()
2346 if 'arm' in uname:
2347 return 'graphics:gles'
2348 return 'graphics:gl'
2349
2350
Bill Richardson4f595f52014-02-13 16:20:26 -08002351 @label_decorator('ec')
2352 def get_ec(self):
2353 """
2354 Determine the type of EC on this host.
2355
2356 @returns a string representing this host's embedded controller type.
2357 At present, it only returns "ec:cros", for Chrome OS ECs. Other types
2358 of EC (or none) don't return any strings, since no tests depend on
2359 those.
2360 """
2361 cmd = 'mosys ec info'
2362 # The output should look like these, so that the last field should
2363 # match our EC version scheme:
2364 #
2365 # stm | stm32f100 | snow_v1.3.139-375eb9f
2366 # ti | Unknown-10de | peppy_v1.5.114-5d52788
2367 #
2368 # Non-Chrome OS ECs will look like these:
2369 #
2370 # ENE | KB932 | 00BE107A00
2371 # ite | it8518 | 3.08
2372 #
2373 # And some systems don't have ECs at all (Lumpy, for example).
2374 regexp = r'^.*\|\s*(\S+_v\d+\.\d+\.\d+-[0-9a-f]+)\s*$'
2375
2376 ecinfo = self.run(command=cmd, ignore_status=True)
2377 if ecinfo.exit_status == 0:
2378 res = re.search(regexp, ecinfo.stdout)
2379 if res:
2380 logging.info("EC version is %s", res.groups()[0])
2381 return 'ec:cros'
2382 logging.info("%s got: %s", cmd, ecinfo.stdout)
2383 # Has an EC, but it's not a Chrome OS EC
2384 return None
2385 logging.info("%s exited with status %d", cmd, ecinfo.exit_status)
2386 # No EC present
2387 return None
2388
2389
Alec Berg31b932b2014-04-04 16:09:11 -07002390 @label_decorator('accels')
2391 def get_accels(self):
2392 """
2393 Determine the type of accelerometers on this host.
2394
2395 @returns a string representing this host's accelerometer type.
2396 At present, it only returns "accel:cros-ec", for accelerometers
2397 attached to a Chrome OS EC, or none, if no accelerometers.
2398 """
2399 # Check to make sure we have ectool
2400 rv = self.run('which ectool', ignore_status=True)
2401 if rv.exit_status:
2402 logging.info("No ectool cmd found, assuming no EC accelerometers")
2403 return None
2404
2405 # Check that the EC supports the motionsense command
2406 rv = self.run('ectool motionsense', ignore_status=True)
2407 if rv.exit_status:
2408 logging.info("EC does not support motionsense command "
2409 "assuming no EC accelerometers")
2410 return None
2411
2412 # Check that EC motion sensors are active
2413 active = self.run('ectool motionsense active').stdout.split('\n')
2414 if active[0] == "0":
2415 logging.info("Motion sense inactive, assuming no EC accelerometers")
2416 return None
2417
2418 logging.info("EC accelerometers found")
2419 return 'accel:cros-ec'
2420
2421
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002422 @label_decorator('chameleon')
2423 def has_chameleon(self):
2424 """Determine if a Chameleon connected to this host.
2425
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002426 @returns a list containing two strings ('chameleon' and
2427 'chameleon:' + label, e.g. 'chameleon:hdmi') if this host
2428 has a Chameleon or None if it has not.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002429 """
2430 if self._chameleon_host:
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002431 return ['chameleon', 'chameleon:' + self.chameleon.get_label()]
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002432 else:
2433 return None
2434
2435
Cheng-Yi Chiangf4104ff2014-12-23 19:39:01 +08002436 @label_decorator('audio_loopback_dongle')
2437 def has_loopback_dongle(self):
2438 """Determine if an audio loopback dongle is plugged to this host.
2439
2440 @returns 'audio_loopback_dongle' when there is an audio loopback dongle
2441 plugged to this host.
2442 None when there is no audio loopback dongle
2443 plugged to this host.
2444 """
2445 server_info = self.run(command='cras_test_client --dump_s',
2446 ignore_status=True).stdout
2447 if (cras_utils.node_type_is_plugged('HEADPHONE', server_info) and
2448 cras_utils.node_type_is_plugged('MIC', server_info)):
2449 return 'audio_loopback_dongle'
2450 else:
2451 return None
2452
2453
Derek Basehorec71ff622014-07-07 15:18:40 -07002454 @label_decorator('power_supply')
2455 def get_power_supply(self):
2456 """
2457 Determine what type of power supply the host has
2458
2459 @returns a string representing this host's power supply.
2460 'power:battery' when the device has a battery intended for
2461 extended use
2462 'power:AC_primary' when the device has a battery not intended
2463 for extended use (for moving the machine, etc)
2464 'power:AC_only' when the device has no battery at all.
2465 """
2466 psu = self.run(command='mosys psu type', ignore_status=True)
2467 if psu.exit_status:
2468 # The psu command for mosys is not included for all platforms. The
2469 # assumption is that the device will have a battery if the command
2470 # is not found.
2471 return 'power:battery'
2472
2473 psu_str = psu.stdout.strip()
2474 if psu_str == 'unknown':
2475 return None
2476
2477 return 'power:%s' % psu_str
2478
2479
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002480 @label_decorator('storage')
2481 def get_storage(self):
2482 """
2483 Determine the type of boot device for this host.
2484
2485 Determine if the internal device is SCSI or dw_mmc device.
2486 Then check that it is SSD or HDD or eMMC or something else.
2487
2488 @returns a string representing this host's internal device type.
2489 'storage:ssd' when internal device is solid state drive
2490 'storage:hdd' when internal device is hard disk drive
2491 'storage:mmc' when internal device is mmc drive
2492 None When internal device is something else or
2493 when we are unable to determine the type
2494 """
2495 # The output should be /dev/mmcblk* for SD/eMMC or /dev/sd* for scsi
2496 rootdev_cmd = ' '.join(['. /usr/sbin/write_gpt.sh;',
2497 '. /usr/share/misc/chromeos-common.sh;',
2498 'load_base_vars;',
2499 'get_fixed_dst_drive'])
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002500 rootdev = self.run(command=rootdev_cmd, ignore_status=True)
2501 if rootdev.exit_status:
2502 logging.info("Fail to run %s", rootdev_cmd)
2503 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002504 rootdev_str = rootdev.stdout.strip()
2505
2506 if not rootdev_str:
2507 return None
2508
2509 rootdev_base = os.path.basename(rootdev_str)
2510
2511 mmc_pattern = '/dev/mmcblk[0-9]'
2512 if re.match(mmc_pattern, rootdev_str):
2513 # Use type to determine if the internal device is eMMC or somthing
2514 # else. We can assume that MMC is always an internal device.
2515 type_cmd = 'cat /sys/block/%s/device/type' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002516 type = self.run(command=type_cmd, ignore_status=True)
2517 if type.exit_status:
2518 logging.info("Fail to run %s", type_cmd)
2519 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002520 type_str = type.stdout.strip()
2521
2522 if type_str == 'MMC':
2523 return 'storage:mmc'
2524
2525 scsi_pattern = '/dev/sd[a-z]+'
2526 if re.match(scsi_pattern, rootdev.stdout):
2527 # Read symlink for /sys/block/sd* to determine if the internal
2528 # device is connected via ata or usb.
2529 link_cmd = 'readlink /sys/block/%s' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002530 link = self.run(command=link_cmd, ignore_status=True)
2531 if link.exit_status:
2532 logging.info("Fail to run %s", link_cmd)
2533 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002534 link_str = link.stdout.strip()
2535 if 'usb' in link_str:
2536 return None
2537
2538 # Read rotation to determine if the internal device is ssd or hdd.
2539 rotate_cmd = str('cat /sys/block/%s/queue/rotational'
2540 % rootdev_base)
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002541 rotate = self.run(command=rotate_cmd, ignore_status=True)
2542 if rotate.exit_status:
2543 logging.info("Fail to run %s", rotate_cmd)
2544 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002545 rotate_str = rotate.stdout.strip()
2546
2547 rotate_dict = {'0':'storage:ssd', '1':'storage:hdd'}
2548 return rotate_dict.get(rotate_str)
2549
2550 # All other internal device / error case will always fall here
2551 return None
2552
2553
Dan Shi4e9a2aa2014-03-24 14:28:42 -07002554 @label_decorator('servo')
2555 def get_servo(self):
2556 """Determine if the host has a servo attached.
2557
2558 If the host has a working servo attached, it should have a servo label.
2559
2560 @return: string 'servo' if the host has servo attached. Otherwise,
2561 returns None.
2562 """
2563 return 'servo' if self._servo_host else None
2564
2565
Dan Shi5beba472014-05-28 22:46:07 -07002566 @label_decorator('video_labels')
2567 def get_video_labels(self):
2568 """Run /usr/local/bin/avtest_label_detect to get a list of video labels.
2569
2570 Sample output of avtest_label_detect:
2571 Detected label: hw_video_acc_vp8
2572 Detected label: webcam
2573
2574 @return: A list of labels detected by tool avtest_label_detect.
2575 """
2576 try:
2577 result = self.run('/usr/local/bin/avtest_label_detect').stdout
2578 return re.findall('^Detected label: (\w+)$', result, re.M)
2579 except error.AutoservRunError:
2580 # The tool is not installed.
2581 return []
2582
2583
mussa584b4462014-06-20 15:13:28 -07002584 @label_decorator('video_glitch_detection')
2585 def is_video_glitch_detection_supported(self):
2586 """ Determine if a board under test is supported for video glitch
2587 detection tests.
2588
2589 @return: 'video_glitch_detection' if board is supported, None otherwise.
2590 """
2591 parser = ConfigParser.SafeConfigParser()
2592 filename = os.path.join(
2593 common.autotest_dir, 'client/cros/video/device_spec.conf')
2594
2595 dut = self.get_board().replace(ds_constants.BOARD_PREFIX, '')
2596
2597 try:
2598 parser.read(filename)
mussa584b4462014-06-20 15:13:28 -07002599 supported_boards = parser.sections()
2600
Mussa83c84d62014-10-02 12:11:28 -07002601 return 'video_glitch_detection' if dut in supported_boards else None
mussa584b4462014-06-20 15:13:28 -07002602
2603 except ConfigParser.error:
2604 # something went wrong while parsing the conf file
2605 return None
2606
Katherine Threlkeld7b97a9f2014-06-24 13:47:14 -07002607 @label_decorator('touch_labels')
2608 def get_touch(self):
2609 """
2610 Determine whether board under test has a touchpad or touchscreen.
2611
2612 @return: A list of some combination of 'touchscreen' and 'touchpad',
2613 depending on what is present on the device.
2614 """
2615 labels = []
2616 input_cmd = '/opt/google/input/inputcontrol --names -t %s'
2617 for elt in ['touchpad', 'touchscreen']:
2618 if self.run(input_cmd % elt).stdout:
2619 labels.append(elt)
2620 return labels
2621
2622
mussa584b4462014-06-20 15:13:28 -07002623
Simran Basic6f1f7a2012-10-16 10:47:46 -07002624 def get_labels(self):
2625 """Return a list of labels for this given host.
2626
2627 This is the main way to retrieve all the automatic labels for a host
2628 as it will run through all the currently implemented label functions.
2629 """
2630 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08002631 for label_function in self._LABEL_FUNCTIONS:
J. Richard Barnetteb869b222014-09-03 17:55:44 -07002632 try:
2633 label = label_function(self)
2634 except Exception as e:
2635 logging.error('Label function %s failed; ignoring it.',
2636 label_function.__name__)
2637 logging.exception(e)
2638 label = None
Simran Basic6f1f7a2012-10-16 10:47:46 -07002639 if label:
Dan Shi5beba472014-05-28 22:46:07 -07002640 if type(label) is str:
2641 labels.append(label)
2642 elif type(label) is list:
2643 labels.extend(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -07002644 return labels
Dan Shi85276d42014-04-08 22:11:45 -07002645
2646
2647 def is_boot_from_usb(self):
2648 """Check if DUT is boot from USB.
2649
2650 @return: True if DUT is boot from usb.
2651 """
2652 device = self.run('rootdev -s -d').stdout.strip()
2653 removable = int(self.run('cat /sys/block/%s/removable' %
2654 os.path.basename(device)).stdout.strip())
2655 return removable == 1
Helen Zhang17dae2b2014-11-11 09:25:52 -08002656
2657
2658 def read_from_meminfo(self, key):
Dan Shi49ca0932014-11-14 11:22:27 -08002659 """Return the memory info from /proc/meminfo
Helen Zhang17dae2b2014-11-11 09:25:52 -08002660
2661 @param key: meminfo requested
2662
2663 @return the memory value as a string
2664
2665 """
Helen Zhang17dae2b2014-11-11 09:25:52 -08002666 meminfo = self.run('grep %s /proc/meminfo' % key).stdout.strip()
2667 logging.debug('%s', meminfo)
2668 return int(re.search(r'\d+', meminfo).group(0))