blob: 8e5e74dc4cccd03be0b16b9c1c3a9377d2c781a8 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
mussa584b4462014-06-20 15:13:28 -07005import ConfigParser
Aviv Keshet74c89a92013-02-04 15:18:30 -08006import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07007import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import logging
Dan Shi0f466e82013-02-22 15:44:58 -08009import os
Simran Basid5e5e272012-09-24 15:23:59 -070010import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080011import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070014import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070015
mussa584b4462014-06-20 15:13:28 -070016import common
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080018from autotest_lib.client.common_lib import error
19from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070020from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080021from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080022from autotest_lib.client.common_lib.cros import retry
Michael Liangda8c60a2014-06-03 13:24:51 -070023from autotest_lib.client.common_lib.cros.graphite import stats
Richard Barnette82c35912012-11-20 10:09:10 -080024from autotest_lib.client.cros import constants
J. Richard Barnette84890bd2014-02-21 11:05:47 -080025from autotest_lib.client.cros import cros_ui
Simran Basi263a9d32014-08-19 11:16:51 -070026from autotest_lib.server import autoserv_parser, autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070027from autotest_lib.server import utils as server_utils
Scott Zawalski89c44dd2013-02-26 09:28:02 -050028from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070029from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Fang Deng96667ca2013-08-01 17:46:18 -070030from autotest_lib.server.hosts import abstract_ssh
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +080031from autotest_lib.server.hosts import chameleon_host
Fang Deng5d518f42013-08-02 14:04:32 -070032from autotest_lib.server.hosts import servo_host
Simran Basidcff4252012-11-20 16:13:20 -080033from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070034
35
beeps32a63082013-08-22 14:02:29 -070036try:
37 import jsonrpclib
38except ImportError:
39 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070040
Fang Dengd1c2b732013-08-20 12:59:46 -070041
beepsc87ff602013-07-31 21:53:00 -070042class FactoryImageCheckerException(error.AutoservError):
43 """Exception raised when an image is a factory image."""
44 pass
45
46
Aviv Keshet74c89a92013-02-04 15:18:30 -080047def add_label_detector(label_function_list, label_list=None, label=None):
48 """Decorator used to group functions together into the provided list.
49 @param label_function_list: List of label detecting functions to add
50 decorated function to.
51 @param label_list: List of detectable labels to add detectable labels to.
52 (Default: None)
53 @param label: Label string that is detectable by this detection function
54 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080055 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070056 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080057 """
58 @param func: The function to be added as a detector.
59 """
60 label_function_list.append(func)
61 if label and label_list is not None:
62 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070063 return func
64 return add_func
65
66
Fang Deng0ca40e22013-08-27 17:47:44 -070067class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070068 """Chromium OS specific subclass of Host."""
69
70 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050071 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070072
Richard Barnette03a0c132012-11-05 12:40:35 -080073 # Timeout values (in seconds) associated with various Chrome OS
74 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070075 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080076 # In general, a good rule of thumb is that the timeout can be up
77 # to twice the typical measured value on the slowest platform.
78 # The times here have not necessarily been empirically tested to
79 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070080 #
81 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080082 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
83 # time to restart the netwowrk.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080084 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070085 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080086 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080087 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070088 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080089 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080090 # network.
beepsf079cfb2013-09-18 17:49:51 -070091 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080092 # POWERWASH_BOOT_TIMEOUT: Time to allow for a reboot that
93 # includes powerwash.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070094
95 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -080096 RESUME_TIMEOUT = 10
Tom Wai-Hong Tam4d169ed2014-02-14 11:05:40 +080097 SHUTDOWN_TIMEOUT = 5
J. Richard Barnettefbcc7122013-07-24 18:24:59 -070098 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070099 USB_BOOT_TIMEOUT = 150
J. Richard Barnette7817b052014-08-28 09:47:29 -0700100 INSTALL_TIMEOUT = 480
Dan Shi2c88eed2013-11-12 10:18:38 -0800101 POWERWASH_BOOT_TIMEOUT = 60
Chris Sosab76e0ee2013-05-22 16:55:41 -0700102
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800103 # REBOOT_TIMEOUT: How long to wait for a reboot.
104 #
Chris Sosab76e0ee2013-05-22 16:55:41 -0700105 # We have a long timeout to ensure we don't flakily fail due to other
106 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
Simran Basi1160e2c2013-10-04 16:00:24 -0700107 # TODO(sbasi - crbug.com/276094) Restore to 5 mins once the 'host did not
108 # return from reboot' bug is solved.
109 REBOOT_TIMEOUT = 480
Chris Sosab76e0ee2013-05-22 16:55:41 -0700110
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800111 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
112 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
113 _USB_POWER_TIMEOUT = 5
114 _POWER_CYCLE_TIMEOUT = 10
115
beeps32a63082013-08-22 14:02:29 -0700116 _RPC_PROXY_URL = 'http://localhost:%d'
Christopher Wileydd181852013-10-10 19:56:58 -0700117 _RPC_SHUTDOWN_POLLING_PERIOD_SECONDS = 2
Peter Qiu4410db72014-06-05 10:32:41 -0700118 # Set shutdown timeout to account for the time for restarting the UI.
119 _RPC_SHUTDOWN_TIMEOUT_SECONDS = cros_ui.RESTART_UI_TIMEOUT
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800120
Richard Barnette82c35912012-11-20 10:09:10 -0800121 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
122 'rpm_recovery_boards', type=str).split(',')
123
124 _MAX_POWER_CYCLE_ATTEMPTS = 6
125 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
Simran Basi9a479b22014-07-09 11:18:40 -0700126 # TODO (sbasi) crbug.com/392548 - renable support for chromeos 4 once the
127 # rpm work is done.
128 _RPM_HOSTNAME_REGEX = ('chromeos[0-3|5-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
Richard Barnette82c35912012-11-20 10:09:10 -0800129 'host[0-9]+')
130 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
131 'in_illuminance0_raw',
132 'illuminance0_input']
133 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
134 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800135 _DETECTABLE_LABELS = []
136 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
137 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700138
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800139 # Constants used in ping_wait_up() and ping_wait_down().
140 #
141 # _PING_WAIT_COUNT is the approximate number of polling
142 # cycles to use when waiting for a host state change.
143 #
144 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
145 # for arguments to the internal _ping_wait_for_status()
146 # method.
147 _PING_WAIT_COUNT = 40
148 _PING_STATUS_DOWN = False
149 _PING_STATUS_UP = True
150
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800151 # Allowed values for the power_method argument.
152
153 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
154 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
155 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
156 POWER_CONTROL_RPM = 'RPM'
157 POWER_CONTROL_SERVO = 'servoj10'
158 POWER_CONTROL_MANUAL = 'manual'
159
160 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
161 POWER_CONTROL_SERVO,
162 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800163
Simran Basi5e6339a2013-03-21 11:34:32 -0700164 _RPM_OUTLET_CHANGED = 'outlet_changed'
165
beeps687243d2013-07-18 15:29:27 -0700166
J. Richard Barnette964fba02012-10-24 17:34:29 -0700167 @staticmethod
beeps46dadc92013-11-07 14:07:10 -0800168 def check_host(host, timeout=10):
169 """
170 Check if the given host is a chrome-os host.
171
172 @param host: An ssh host representing a device.
173 @param timeout: The timeout for the run command.
174
175 @return: True if the host device is chromeos.
176
beeps46dadc92013-11-07 14:07:10 -0800177 """
178 try:
Christopher Wiley1ea80942014-02-26 16:45:08 -0800179 result = host.run('grep -q CHROMEOS /etc/lsb-release && '
Simran Basie5f7ae42014-06-26 15:44:06 -0700180 '! which adb >/dev/null 2>&1 && '
181 '! grep -q moblab /etc/lsb-release',
Christopher Wileyfc3eac02013-11-21 16:24:57 -0800182 ignore_status=True, timeout=timeout)
beeps46dadc92013-11-07 14:07:10 -0800183 except (error.AutoservRunError, error.AutoservSSHTimeout):
184 return False
185 return result.exit_status == 0
186
187
188 @staticmethod
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800189 def _extract_arguments(args_dict, key_subset):
190 """Extract options from `args_dict` and return a subset result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800191
192 Take the provided dictionary of argument options and return
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800193 a subset that represent standard arguments needed to construct
194 a test-assistant object (chameleon or servo) for a host. The
195 intent is to provide standard argument processing from
196 run_remote_tests for tests that require a test-assistant board
197 to operate.
198
199 @param args_dict Dictionary from which to extract the arguments.
200 @param key_subset Tuple of keys to extract from the args_dict, e.g.
201 ('servo_host', 'servo_port').
202 """
203 result = {}
204 for arg in key_subset:
205 if arg in args_dict:
206 result[arg] = args_dict[arg]
207 return result
208
209
210 @staticmethod
211 def get_chameleon_arguments(args_dict):
212 """Extract chameleon options from `args_dict` and return the result.
213
214 Recommended usage:
215 ~~~~~~~~
216 args_dict = utils.args_to_dict(args)
217 chameleon_args = hosts.CrosHost.get_chameleon_arguments(args_dict)
218 host = hosts.create_host(machine, chameleon_args=chameleon_args)
219 ~~~~~~~~
220
221 @param args_dict Dictionary from which to extract the chameleon
222 arguments.
223 """
224 return CrosHost._extract_arguments(
225 args_dict, ('chameleon_host', 'chameleon_port'))
226
227
228 @staticmethod
229 def get_servo_arguments(args_dict):
230 """Extract servo options from `args_dict` and return the result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800231
232 Recommended usage:
233 ~~~~~~~~
234 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700235 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800236 host = hosts.create_host(machine, servo_args=servo_args)
237 ~~~~~~~~
238
239 @param args_dict Dictionary from which to extract the servo
240 arguments.
241 """
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800242 return CrosHost._extract_arguments(
243 args_dict, ('servo_host', 'servo_port'))
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700244
J. Richard Barnette964fba02012-10-24 17:34:29 -0700245
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800246 def _initialize(self, hostname, chameleon_args=None, servo_args=None,
247 ssh_verbosity_flag='', ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700248 *args, **dargs):
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800249 """Initialize superclasses, |self.chameleon|, and |self.servo|.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700250
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800251 This method checks whether a chameleon/servo (aka
252 test-assistant objects) is required by checking whether
253 chameleon_args/servo_args is None. This method will only
254 attempt to create the test-assistant object when it is
255 required by the test.
Fang Deng5d518f42013-08-02 14:04:32 -0700256
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800257 For creating the test-assistant object, there are three
258 possibilities: First, if the host is a lab system known to have
259 a test-assistant board, we connect to that board unconditionally.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700260 Second, if we're called from a control file that requires
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800261 test-assistant features for testing, it will pass settings from
262 the arguments, like `servo_host`, `servo_port`. If neither of
263 these cases apply, the test-assistant object will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700264
265 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700266 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700267 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700268 # self.env is a dictionary of environment variable settings
269 # to be exported for commands run on the host.
270 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
271 # errors that might happen.
272 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700273 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700274 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700275 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700276 # TODO(fdeng): We need to simplify the
277 # process of servo and servo_host initialization.
278 # crbug.com/298432
Dan Shi4d478522014-02-14 13:46:32 -0800279 self._servo_host = servo_host.create_servo_host(dut=self.hostname,
280 servo_args=servo_args)
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800281 # TODO(waihong): Do the simplication on Chameleon too.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800282 self._chameleon_host = chameleon_host.create_chameleon_host(
283 dut=self.hostname, chameleon_args=chameleon_args)
284
Dan Shi4d478522014-02-14 13:46:32 -0800285 if self._servo_host is not None:
286 self.servo = self._servo_host.get_servo()
287 else:
288 self.servo = None
289
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800290 if self._chameleon_host:
Tom Wai-Hong Tameaee3402014-01-22 08:52:10 +0800291 self.chameleon = self._chameleon_host.create_chameleon_board()
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800292 else:
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800293 self.chameleon = None
Fang Deng5d518f42013-08-02 14:04:32 -0700294
295
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500296 def get_repair_image_name(self):
297 """Generate a image_name from variables in the global config.
298
299 @returns a str of $board-version/$BUILD.
300
301 """
302 stable_version = global_config.global_config.get_config_value(
303 'CROS', 'stable_cros_version')
304 build_pattern = global_config.global_config.get_config_value(
305 'CROS', 'stable_build_pattern')
306 board = self._get_board_from_afe()
307 if board is None:
308 raise error.AutoservError('DUT has no board attribute, '
309 'cannot be repaired.')
310 return build_pattern % (board, stable_version)
311
312
Scott Zawalski62bacae2013-03-05 10:40:32 -0500313 def _host_in_AFE(self):
314 """Check if the host is an object the AFE knows.
315
316 @returns the host object.
317 """
318 return self._AFE.get_hosts(hostname=self.hostname)
319
320
Chris Sosab76e0ee2013-05-22 16:55:41 -0700321 def lookup_job_repo_url(self):
322 """Looks up the job_repo_url for the host.
323
324 @returns job_repo_url from AFE or None if not found.
325
326 @raises KeyError if the host does not have a job_repo_url
327 """
328 if not self._host_in_AFE():
329 return None
330
331 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700332 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
333 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700334
335
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500336 def clear_cros_version_labels_and_job_repo_url(self):
337 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500338 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400339 return
340
Scott Zawalski62bacae2013-03-05 10:40:32 -0500341 host_list = [self.hostname]
342 labels = self._AFE.get_labels(
343 name__startswith=ds_constants.VERSION_PREFIX,
344 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800345
Scott Zawalski62bacae2013-03-05 10:40:32 -0500346 for label in labels:
347 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500348
beepscb6f1e22013-06-28 19:14:10 -0700349 self.update_job_repo_url(None, None)
350
351
352 def update_job_repo_url(self, devserver_url, image_name):
353 """
354 Updates the job_repo_url host attribute and asserts it's value.
355
356 @param devserver_url: The devserver to use in the job_repo_url.
357 @param image_name: The name of the image to use in the job_repo_url.
358
359 @raises AutoservError: If we failed to update the job_repo_url.
360 """
361 repo_url = None
362 if devserver_url and image_name:
363 repo_url = tools.get_package_url(devserver_url, image_name)
364 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500365 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700366 if self.lookup_job_repo_url() != repo_url:
367 raise error.AutoservError('Failed to update job_repo_url with %s, '
368 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500369
370
Dan Shie9309262013-06-19 22:50:21 -0700371 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400372 """Add cros_version labels and host attribute job_repo_url.
373
374 @param image_name: The name of the image e.g.
375 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700376
Scott Zawalskieadbf702013-03-14 09:23:06 -0400377 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500378 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400379 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500380
Scott Zawalskieadbf702013-03-14 09:23:06 -0400381 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700382 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500383
384 labels = self._AFE.get_labels(name=cros_label)
385 if labels:
386 label = labels[0]
387 else:
388 label = self._AFE.create_label(name=cros_label)
389
390 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700391 self.update_job_repo_url(devserver_url, image_name)
392
393
beepsdae65fd2013-07-26 16:24:41 -0700394 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700395 """
396 Make sure job_repo_url of this host is valid.
397
joychen03eaad92013-06-26 09:55:21 -0700398 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700399 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
400 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
401 download and extract it. If the devserver embedded in the url is
402 unresponsive, update the job_repo_url of the host after staging it on
403 another devserver.
404
405 @param job_repo_url: A url pointing to the devserver where the autotest
406 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700407 @param tag: The tag from the server job, in the format
408 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700409
410 @raises DevServerException: If we could not resolve a devserver.
411 @raises AutoservError: If we're unable to save the new job_repo_url as
412 a result of choosing a new devserver because the old one failed to
413 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700414 @raises urllib2.URLError: If the devserver embedded in job_repo_url
415 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700416 """
417 job_repo_url = self.lookup_job_repo_url()
418 if not job_repo_url:
419 logging.warning('No job repo url set on host %s', self.hostname)
420 return
421
422 logging.info('Verifying job repo url %s', job_repo_url)
423 devserver_url, image_name = tools.get_devserver_build_from_package_url(
424 job_repo_url)
425
beeps0c865032013-07-30 11:37:06 -0700426 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700427
428 logging.info('Staging autotest artifacts for %s on devserver %s',
429 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700430
431 start_time = time.time()
beepscb6f1e22013-06-28 19:14:10 -0700432 ds.stage_artifacts(image_name, ['autotest'])
beeps687243d2013-07-18 15:29:27 -0700433 stage_time = time.time() - start_time
434
435 # Record how much of the verification time comes from a devserver
436 # restage. If we're doing things right we should not see multiple
437 # devservers for a given board/build/branch path.
438 try:
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800439 board, build_type, branch = server_utils.ParseBuildName(
beeps687243d2013-07-18 15:29:27 -0700440 image_name)[:3]
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800441 except server_utils.ParseBuildNameException:
beeps687243d2013-07-18 15:29:27 -0700442 pass
443 else:
beeps0c865032013-07-30 11:37:06 -0700444 devserver = devserver_url[
Chris Sosa65425082013-10-16 13:26:22 -0700445 devserver_url.find('/') + 2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700446 stats_key = {
447 'board': board,
448 'build_type': build_type,
449 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700450 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700451 }
452 stats.Gauge('verify_job_repo_url').send(
453 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
454 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700455
Scott Zawalskieadbf702013-03-14 09:23:06 -0400456
Dan Shi0f466e82013-02-22 15:44:58 -0800457 def _try_stateful_update(self, update_url, force_update, updater):
458 """Try to use stateful update to initialize DUT.
459
460 When DUT is already running the same version that machine_install
461 tries to install, stateful update is a much faster way to clean up
462 the DUT for testing, compared to a full reimage. It is implemeted
463 by calling autoupdater.run_update, but skipping updating root, as
464 updating the kernel is time consuming and not necessary.
465
466 @param update_url: url of the image.
467 @param force_update: Set to True to update the image even if the DUT
468 is running the same version.
469 @param updater: ChromiumOSUpdater instance used to update the DUT.
470 @returns: True if the DUT was updated with stateful update.
471
472 """
J. Richard Barnette3f731032014-04-07 17:42:59 -0700473 # TODO(jrbarnette): Yes, I hate this re.match() test case.
474 # It's better than the alternative: see crbug.com/360944.
475 image_name = autoupdater.url_to_image_name(update_url)
476 release_pattern = r'^.*-release/R[0-9]+-[0-9]+\.[0-9]+\.0$'
477 if not re.match(release_pattern, image_name):
478 return False
Dan Shi0f466e82013-02-22 15:44:58 -0800479 if not updater.check_version():
480 return False
481 if not force_update:
482 logging.info('Canceling stateful update because the new and '
483 'old versions are the same.')
484 return False
485 # Following folders should be rebuilt after stateful update.
486 # A test file is used to confirm each folder gets rebuilt after
487 # the stateful update.
488 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
489 test_file = '.test_file_to_be_deleted'
490 for folder in folders_to_check:
491 touch_path = os.path.join(folder, test_file)
492 self.run('touch %s' % touch_path)
493
494 if not updater.run_update(force_update=True, update_root=False):
495 return False
496
497 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700498 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800499 check_file_cmd = 'test -f %s; echo $?'
500 for folder in folders_to_check:
501 test_file_path = os.path.join(folder, test_file)
502 result = self.run(check_file_cmd % test_file_path,
503 ignore_status=True)
504 if result.exit_status == 1:
505 return False
506 return True
507
508
J. Richard Barnette7275b612013-06-04 18:13:11 -0700509 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800510 """After the DUT is updated, confirm machine_install succeeded.
511
512 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700513 @param expected_kernel: kernel expected to be active after reboot,
514 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800515
516 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700517 # Touch the lab machine file to leave a marker that
518 # distinguishes this image from other test images.
519 # Afterwards, we must re-run the autoreboot script because
520 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800521 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800522 self.run('start autoreboot')
Chris Sosa65425082013-10-16 13:26:22 -0700523 updater.verify_boot_expectations(
524 expected_kernel, rollback_message=
525 'Build %s failed to boot on %s; system rolled back to previous'
526 'build' % (updater.update_version, self.hostname))
J. Richard Barnette7275b612013-06-04 18:13:11 -0700527 # Check that we've got the build we meant to install.
528 if not updater.check_version_to_confirm_install():
529 raise autoupdater.ChromiumOSError(
530 'Failed to update %s to build %s; found build '
531 '%s instead' % (self.hostname,
Chris Sosa65425082013-10-16 13:26:22 -0700532 updater.update_version,
533 updater.get_build_id()))
Dan Shi0f466e82013-02-22 15:44:58 -0800534
535
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700536 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400537 """Stage a build on a devserver and return the update_url.
538
539 @param image_name: a name like lumpy-release/R27-3837.0.0
540 @returns an update URL like:
541 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
542 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700543 if not image_name:
544 image_name = self.get_repair_image_name()
545 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400546 devserver = dev_server.ImageServer.resolve(image_name)
547 devserver.trigger_download(image_name, synchronous=False)
548 return tools.image_url_pattern() % (devserver.url(), image_name)
549
550
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700551 def stage_image_for_servo(self, image_name=None):
552 """Stage a build on a devserver and return the update_url.
553
554 @param image_name: a name like lumpy-release/R27-3837.0.0
555 @returns an update URL like:
556 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
557 """
558 if not image_name:
559 image_name = self.get_repair_image_name()
560 logging.info('Staging build for servo install: %s', image_name)
561 devserver = dev_server.ImageServer.resolve(image_name)
562 devserver.stage_artifacts(image_name, ['test_image'])
563 return devserver.get_test_image_url(image_name)
564
565
beepse539be02013-07-31 21:57:39 -0700566 def stage_factory_image_for_servo(self, image_name):
567 """Stage a build on a devserver and return the update_url.
568
569 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700570
beepse539be02013-07-31 21:57:39 -0700571 @return: An update URL, eg:
572 http://<devserver>/static/canary-channel/\
573 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700574
575 @raises: ValueError if the factory artifact name is missing from
576 the config.
577
beepse539be02013-07-31 21:57:39 -0700578 """
579 if not image_name:
580 logging.error('Need an image_name to stage a factory image.')
581 return
582
beeps12c0a3c2013-09-03 11:58:27 -0700583 factory_artifact = global_config.global_config.get_config_value(
584 'CROS', 'factory_artifact', type=str, default='')
585 if not factory_artifact:
586 raise ValueError('Cannot retrieve the factory artifact name from '
587 'autotest config, and hence cannot stage factory '
588 'artifacts.')
589
beepse539be02013-07-31 21:57:39 -0700590 logging.info('Staging build for servo install: %s', image_name)
591 devserver = dev_server.ImageServer.resolve(image_name)
592 devserver.stage_artifacts(
593 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700594 [factory_artifact],
595 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700596
597 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
598
599
Chris Sosaa3ac2152012-05-23 22:23:13 -0700600 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500601 local_devserver=False, repair=False):
602 """Install the DUT.
603
Dan Shi0f466e82013-02-22 15:44:58 -0800604 Use stateful update if the DUT is already running the same build.
605 Stateful update does not update kernel and tends to run much faster
606 than a full reimage. If the DUT is running a different build, or it
607 failed to do a stateful update, full update, including kernel update,
608 will be applied to the DUT.
609
Scott Zawalskieadbf702013-03-14 09:23:06 -0400610 Once a host enters machine_install its cros_version label will be
611 removed as well as its host attribute job_repo_url (used for
612 package install).
613
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500614 @param update_url: The url to use for the update
615 pattern: http://$devserver:###/update/$build
616 If update_url is None and repair is True we will install the
617 stable image listed in global_config under
618 CROS.stable_cros_version.
619 @param force_update: Force an update even if the version installed
620 is the same. Default:False
621 @param local_devserver: Used by run_remote_test to allow people to
622 use their local devserver. Default: False
623 @param repair: Whether or not we are in repair mode. This adds special
624 cases for repairing a machine like starting update_engine.
625 Setting repair to True sets force_update to True as well.
626 default: False
627 @raises autoupdater.ChromiumOSError
628
629 """
Dan Shi7458bf62013-06-10 12:50:16 -0700630 if update_url:
631 logging.debug('update url is set to %s', update_url)
632 else:
633 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700634 if self._parser.options.image:
635 requested_build = self._parser.options.image
636 if requested_build.startswith('http://'):
637 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700638 logging.debug('update url is retrieved from requested_build'
639 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700640 else:
641 # Try to stage any build that does not start with
642 # http:// on the devservers defined in
643 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700644 update_url = self._stage_image_for_update(requested_build)
645 logging.debug('Build staged, and update_url is set to: %s',
646 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700647 elif repair:
648 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700649 logging.debug('Build staged, and update_url is set to: %s',
650 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400651 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700652 raise autoupdater.ChromiumOSError(
653 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500654
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500655 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800656 # In case the system is in a bad state, we always reboot the machine
657 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700658 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500659 self.run('stop update-engine; start update-engine')
660 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800661
Chris Sosaa3ac2152012-05-23 22:23:13 -0700662 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700663 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800664 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400665 # Remove cros-version and job_repo_url host attribute from host.
666 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800667 # If the DUT is already running the same build, try stateful update
668 # first. Stateful update does not update kernel and tends to run much
669 # faster than a full reimage.
670 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700671 updated = self._try_stateful_update(
672 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800673 if updated:
674 logging.info('DUT is updated with stateful update.')
675 except Exception as e:
676 logging.exception(e)
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700677 logging.warning('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700678
Dan Shi0f466e82013-02-22 15:44:58 -0800679 inactive_kernel = None
680 # Do a full update if stateful update is not applicable or failed.
681 if not updated:
682 # In case the system is in a bad state, we always reboot the
683 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700684 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700685
686 # TODO(sosa): Remove temporary hack to get rid of bricked machines
687 # that can't update due to a corrupted policy.
688 self.run('rm -rf /var/lib/whitelist')
689 self.run('touch /var/lib/whitelist')
690 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400691 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700692
Dan Shi0f466e82013-02-22 15:44:58 -0800693 if updater.run_update(force_update):
694 updated = True
695 # Figure out active and inactive kernel.
696 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700697
Dan Shi0f466e82013-02-22 15:44:58 -0800698 # Ensure inactive kernel has higher priority than active.
699 if (updater.get_kernel_priority(inactive_kernel)
700 < updater.get_kernel_priority(active_kernel)):
701 raise autoupdater.ChromiumOSError(
702 'Update failed. The priority of the inactive kernel'
703 ' partition is less than that of the active kernel'
704 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700705
Dan Shi0f466e82013-02-22 15:44:58 -0800706 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700707 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700708
Dan Shi0f466e82013-02-22 15:44:58 -0800709 if updated:
710 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400711 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700712 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800713
Simran Basiae08c8c2014-09-02 11:17:26 -0700714 logging.debug('Cleaning up old autotest directories.')
715 try:
716 installed_autodir = autotest.Autotest.get_installed_autodir(self)
717 self.run('rm -rf ' + installed_autodir)
718 except autotest.AutodirNotFoundError:
719 logging.debug('No autotest installed directory found.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700720
721
Dan Shi10e992b2013-08-30 11:02:59 -0700722 def show_update_engine_log(self):
723 """Output update engine log."""
724 logging.debug('Dumping %s', constants.UPDATE_ENGINE_LOG)
725 self.run('cat %s' % constants.UPDATE_ENGINE_LOG)
726
727
Richard Barnette82c35912012-11-20 10:09:10 -0800728 def _get_board_from_afe(self):
729 """Retrieve this host's board from its labels in the AFE.
730
731 Looks for a host label of the form "board:<board>", and
732 returns the "<board>" part of the label. `None` is returned
733 if there is not a single, unique label matching the pattern.
734
735 @returns board from label, or `None`.
736 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700737 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800738
739
740 def get_build(self):
741 """Retrieve the current build for this Host from the AFE.
742
743 Looks through this host's labels in the AFE to determine its build.
744
745 @returns The current build or None if it could not find it or if there
746 were multiple build labels assigned to this host.
747 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700748 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800749
750
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500751 def _install_repair(self):
752 """Attempt to repair this host using upate-engine.
753
754 If the host is up, try installing the DUT with a stable
755 "repair" version of Chrome OS as defined in the global_config
756 under CROS.stable_cros_version.
757
Scott Zawalski62bacae2013-03-05 10:40:32 -0500758 @raises AutoservRepairMethodNA if the DUT is not reachable.
759 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500760
761 """
762 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500763 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500764 logging.info('Attempting to reimage machine to repair image.')
765 try:
766 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700767 except autoupdater.ChromiumOSError as e:
768 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500769 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500770 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500771
772
Dan Shi2c88eed2013-11-12 10:18:38 -0800773 def _install_repair_with_powerwash(self):
Dan Shi9cc48452013-11-12 12:39:26 -0800774 """Attempt to powerwash first then repair this host using update-engine.
Dan Shi2c88eed2013-11-12 10:18:38 -0800775
Dan Shi9cc48452013-11-12 12:39:26 -0800776 update-engine may fail due to a bad image. In such case, powerwash
777 may help to cleanup the DUT for update-engine to work again.
Dan Shi2c88eed2013-11-12 10:18:38 -0800778
779 @raises AutoservRepairMethodNA if the DUT is not reachable.
780 @raises ChromiumOSError if the install failed for some reason.
781
782 """
783 if not self.is_up():
784 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
785
786 logging.info('Attempting to powerwash the DUT.')
787 self.run('echo "fast safe" > '
788 '/mnt/stateful_partition/factory_install_reset')
789 self.reboot(timeout=self.POWERWASH_BOOT_TIMEOUT, wait=True)
790 if not self.is_up():
Dan Shi9cc48452013-11-12 12:39:26 -0800791 logging.error('Powerwash failed. DUT did not come back after '
Dan Shi2c88eed2013-11-12 10:18:38 -0800792 'reboot.')
793 raise error.AutoservRepairFailure(
794 'DUT failed to boot from powerwash after %d seconds' %
795 self.POWERWASH_BOOT_TIMEOUT)
796
797 logging.info('Powerwash succeeded.')
798 self._install_repair()
799
800
beepsf079cfb2013-09-18 17:49:51 -0700801 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
802 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500803 """
804 Re-install the OS on the DUT by:
805 1) installing a test image on a USB storage device attached to the Servo
806 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800807 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700808 3) installing the image with chromeos-install.
809
Scott Zawalski62bacae2013-03-05 10:40:32 -0500810 @param image_url: If specified use as the url to install on the DUT.
811 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700812 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
813 Factory images need a longer usb_boot_timeout than regular
814 cros images.
815 @param install_timeout: The timeout to use when installing the chromeos
816 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800817
Scott Zawalski62bacae2013-03-05 10:40:32 -0500818 @raises AutoservError if the image fails to boot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800819 """
beepsf079cfb2013-09-18 17:49:51 -0700820
821 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
822 % usb_boot_timeout)
823 logging.info('Downloading image to USB, then booting from it. Usb boot '
824 'timeout = %s', usb_boot_timeout)
825 timer = stats.Timer(usb_boot_timer_key)
826 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700827 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700828 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500829 raise error.AutoservRepairFailure(
830 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700831 usb_boot_timeout)
832 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500833
beepsf079cfb2013-09-18 17:49:51 -0700834 install_timer_key = ('servo_install.install_timeout_%s'
835 % install_timeout)
836 timer = stats.Timer(install_timer_key)
837 timer.start()
838 logging.info('Installing image through chromeos-install.')
839 self.run('chromeos-install --yes', timeout=install_timeout)
840 timer.stop()
841
842 logging.info('Power cycling DUT through servo.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800843 self.servo.power_long_press()
Fang Dengafb88142013-05-30 17:44:31 -0700844 self.servo.switch_usbkey('off')
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700845 # We *must* use power_on() here; on Parrot it's how we get
846 # out of recovery mode.
847 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -0700848
849 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800850 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
851 raise error.AutoservError('DUT failed to reboot installed '
852 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500853 self.BOOT_TIMEOUT)
854
855
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700856 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500857 """Reinstall the DUT utilizing servo and a test image.
858
859 Re-install the OS on the DUT by:
860 1) installing a test image on a USB storage device attached to the Servo
861 board,
862 2) booting that image in recovery mode, and then
863 3) installing the image with chromeos-install.
864
Scott Zawalski62bacae2013-03-05 10:40:32 -0500865 @raises AutoservRepairMethodNA if the device does not have servo
866 support.
867
868 """
869 if not self.servo:
870 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
871 'DUT has no servo support.')
872
873 logging.info('Attempting to recovery servo enabled device with '
874 'servo_repair_reinstall')
875
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700876 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500877 self.servo_install(image_url)
878
879
880 def _servo_repair_power(self):
881 """Attempt to repair DUT using an attached Servo.
882
883 Attempt to power on the DUT via power_long_press.
884
885 @raises AutoservRepairMethodNA if the device does not have servo
886 support.
887 @raises AutoservRepairFailure if the repair fails for any reason.
888 """
889 if not self.servo:
890 raise error.AutoservRepairMethodNA('Repair Power NA: '
891 'DUT has no servo support.')
892
893 logging.info('Attempting to recover servo enabled device by '
894 'powering it off and on.')
895 self.servo.get_power_state_controller().power_off()
896 self.servo.get_power_state_controller().power_on()
897 if self.wait_up(self.BOOT_TIMEOUT):
898 return
899
900 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800901
902
Richard Barnette82c35912012-11-20 10:09:10 -0800903 def _powercycle_to_repair(self):
904 """Utilize the RPM Infrastructure to bring the host back up.
905
906 If the host is not up/repaired after the first powercycle we utilize
907 auto fallback to the last good install by powercycling and rebooting the
908 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500909
910 @raises AutoservRepairMethodNA if the device does not support remote
911 power.
912 @raises AutoservRepairFailure if the repair fails for any reason.
913
Richard Barnette82c35912012-11-20 10:09:10 -0800914 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500915 if not self.has_power():
916 raise error.AutoservRepairMethodNA('Device does not support power.')
917
Richard Barnette82c35912012-11-20 10:09:10 -0800918 logging.info('Attempting repair via RPM powercycle.')
919 failed_cycles = 0
920 self.power_cycle()
921 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
922 failed_cycles += 1
923 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -0500924 raise error.AutoservRepairFailure(
925 'Powercycled host %s %d times; device did not come back'
926 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -0800927 self.power_cycle()
928 if failed_cycles == 0:
929 logging.info('Powercycling was successful first time.')
930 else:
931 logging.info('Powercycling was successful after %d failures.',
932 failed_cycles)
933
934
Prashanth B4d8184f2014-05-05 12:22:02 -0700935 def check_device(self):
936 """Check if a device is ssh-able, and if so, clean and verify it.
937
938 @raise AutoservSSHTimeout: If the ssh ping times out.
939 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
940 permissions.
941 @raise AutoservSshPingHostError: For other AutoservRunErrors during
942 ssh_ping.
943 @raises AutoservError: As appropriate, during cleanup and verify.
944 """
945 self.ssh_ping()
946 self.cleanup()
947 self.verify()
948
949
Richard Barnette82c35912012-11-20 10:09:10 -0800950 def repair_full(self):
951 """Repair a host for repair level NO_PROTECTION.
952
953 This overrides the base class function for repair; it does
954 not call back to the parent class, but instead offers a
955 simplified implementation based on the capabilities in the
956 Chrome OS test lab.
957
Fang Deng5d518f42013-08-02 14:04:32 -0700958 It first verifies and repairs servo if it is a DUT in CrOS
Fang Deng03590af2013-10-07 17:34:20 -0700959 lab and a servo is attached.
Fang Deng5d518f42013-08-02 14:04:32 -0700960
Prashanth B4d8184f2014-05-05 12:22:02 -0700961 If `self.check_device()` fails, the following procedures are
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700962 attempted:
963 1. Try to re-install to a known stable image using
964 auto-update.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500965 2. If there's a servo for the DUT, try to power the DUT off and
966 on.
967 3. If there's a servo for the DUT, try to re-install via
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700968 the servo.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500969 4. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800970 by power-cycling.
971
972 As with the parent method, the last operation performed on
Prashanth B4d8184f2014-05-05 12:22:02 -0700973 the DUT must be to call `self.check_device()`; If that call fails the
974 exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700975
Scott Zawalski62bacae2013-03-05 10:40:32 -0500976 @raises AutoservRepairTotalFailure if the repair process fails to
977 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -0700978 @raises ServoHostRepairTotalFailure if the repair process fails to
979 fix the servo host if one is attached to the DUT.
980 @raises AutoservSshPermissionDeniedError if it is unable
981 to ssh to the servo host due to permission error.
982
Richard Barnette82c35912012-11-20 10:09:10 -0800983 """
Dan Shi4d478522014-02-14 13:46:32 -0800984 if self._servo_host and not self.servo:
Fang Deng03590af2013-10-07 17:34:20 -0700985 try:
Dan Shi4d478522014-02-14 13:46:32 -0800986 self._servo_host.repair_full()
Fang Deng03590af2013-10-07 17:34:20 -0700987 except Exception as e:
Fang Deng03590af2013-10-07 17:34:20 -0700988 logging.error('Could not create a healthy servo: %s', e)
Dan Shi4d478522014-02-14 13:46:32 -0800989 self.servo = self._servo_host.get_servo()
Fang Deng5d518f42013-08-02 14:04:32 -0700990
Scott Zawalski62bacae2013-03-05 10:40:32 -0500991 # TODO(scottz): This should use something similar to label_decorator,
992 # but needs to be populated in order so DUTs are repaired with the
993 # least amount of effort.
Dan Shi849a1c42014-03-05 11:10:43 -0800994 repair_funcs = [self._servo_repair_power,
995 self._install_repair,
Dan Shi2c88eed2013-11-12 10:18:38 -0800996 self._install_repair_with_powerwash,
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700997 self._servo_repair_reinstall,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500998 self._powercycle_to_repair]
999 errors = []
Simran Basie6130932013-10-01 14:07:52 -07001000 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001001 for repair_func in repair_funcs:
1002 try:
1003 repair_func()
Prashanth B4d8184f2014-05-05 12:22:02 -07001004 self.check_device()
Simran Basie6130932013-10-01 14:07:52 -07001005 stats.Counter(
1006 '%s.SUCCEEDED' % repair_func.__name__).increment()
1007 if board:
1008 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001009 '%s.%s.SUCCEEDED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001010 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001011 return
Simran Basie6130932013-10-01 14:07:52 -07001012 except error.AutoservRepairMethodNA as e:
1013 stats.Counter(
1014 '%s.RepairNA' % repair_func.__name__).increment()
1015 if board:
1016 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001017 '%s.%s.RepairNA' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001018 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001019 logging.warning('Repair function NA: %s', e)
Simran Basie6130932013-10-01 14:07:52 -07001020 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -05001021 except Exception as e:
Simran Basie6130932013-10-01 14:07:52 -07001022 stats.Counter(
1023 '%s.FAILED' % repair_func.__name__).increment()
1024 if board:
1025 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001026 '%s.%s.FAILED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001027 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001028 logging.warning('Failed to repair device: %s', e)
Scott Zawalski62bacae2013-03-05 10:40:32 -05001029 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -05001030
Simran Basie6130932013-10-01 14:07:52 -07001031 stats.Counter('Full_Repair_Failed').increment()
1032 if board:
1033 stats.Counter(
1034 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001035 raise error.AutoservRepairTotalFailure(
1036 'All attempts at repairing the device failed:\n%s' %
1037 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -08001038
1039
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001040 def close(self):
beeps32a63082013-08-22 14:02:29 -07001041 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -07001042 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001043
1044
Simran Basi5e6339a2013-03-21 11:34:32 -07001045 def _cleanup_poweron(self):
1046 """Special cleanup method to make sure hosts always get power back."""
1047 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1048 hosts = afe.get_hosts(hostname=self.hostname)
1049 if not hosts or not (self._RPM_OUTLET_CHANGED in
1050 hosts[0].attributes):
1051 return
1052 logging.debug('This host has recently interacted with the RPM'
1053 ' Infrastructure. Ensuring power is on.')
1054 try:
1055 self.power_on()
1056 except rpm_client.RemotePowerException:
1057 # If cleanup has completed but there was an issue with the RPM
1058 # Infrastructure, log an error message rather than fail cleanup
1059 logging.error('Failed to turn Power On for this host after '
1060 'cleanup through the RPM Infrastructure.')
1061 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1062 hostname=self.hostname)
1063
1064
beepsc87ff602013-07-31 21:53:00 -07001065 def _is_factory_image(self):
1066 """Checks if the image on the DUT is a factory image.
1067
1068 @return: True if the image on the DUT is a factory image.
1069 False otherwise.
1070 """
1071 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1072 return result.exit_status == 0
1073
1074
1075 def _restart_ui(self):
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001076 """Restart the Chrome UI.
beepsc87ff602013-07-31 21:53:00 -07001077
1078 @raises: FactoryImageCheckerException for factory images, since
1079 we cannot attempt to restart ui on them.
1080 error.AutoservRunError for any other type of error that
1081 occurs while restarting ui.
1082 """
1083 if self._is_factory_image():
1084 raise FactoryImageCheckerException('Cannot restart ui on factory '
1085 'images')
1086
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001087 # TODO(jrbarnette): The command to stop/start the ui job
1088 # should live inside cros_ui, too. However that would seem
1089 # to imply interface changes to the existing start()/restart()
1090 # functions, which is a bridge too far (for now).
1091 prompt = cros_ui.get_login_prompt_state(self)
1092 self.run('stop ui; start ui')
1093 cros_ui.wait_for_chrome_ready(prompt, self)
beepsc87ff602013-07-31 21:53:00 -07001094
1095
1096 def cleanup(self):
Richard Barnette82c35912012-11-20 10:09:10 -08001097 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001098 try:
beepsc87ff602013-07-31 21:53:00 -07001099 self._restart_ui()
1100 except (error.AutotestRunError, error.AutoservRunError,
1101 FactoryImageCheckerException):
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001102 logging.warning('Unable to restart ui, rebooting device.')
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001103 # Since restarting the UI fails fall back to normal Autotest
1104 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001105 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001106 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001107 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001108 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001109
1110
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001111 def reboot(self, **dargs):
1112 """
1113 This function reboots the site host. The more generic
1114 RemoteHost.reboot() performs sync and sleeps for 5
1115 seconds. This is not necessary for Chrome OS devices as the
1116 sync should be finished in a short time during the reboot
1117 command.
1118 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001119 if 'reboot_cmd' not in dargs:
Doug Anderson7d5aeb22014-02-27 15:12:17 -08001120 reboot_timeout = dargs.get('reboot_timeout', 10)
1121 dargs['reboot_cmd'] = ('((reboot & sleep %d; reboot -f &)'
1122 ' </dev/null >/dev/null 2>&1 &)' %
1123 reboot_timeout)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001124 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001125 if 'fastsync' not in dargs:
1126 dargs['fastsync'] = True
Michael Liangda8c60a2014-06-03 13:24:51 -07001127
Charlie Mooneya8e6dab2014-05-29 14:37:55 -07001128 # For purposes of logging reboot times:
1129 # Get the board name i.e. 'daisy_spring'
Michael Liangca4f5a62014-07-10 15:45:13 -07001130 board_fullname = self.get_board()
1131
1132 # Strip the prefix and add it to dargs.
1133 dargs['board'] = board_fullname[board_fullname.find(':')+1:]
Fang Deng0ca40e22013-08-27 17:47:44 -07001134 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001135
1136
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07001137 def suspend(self, **dargs):
1138 """
1139 This function suspends the site host.
1140 """
1141 suspend_time = dargs.get('suspend_time', 60)
1142 dargs['timeout'] = suspend_time
1143 if 'suspend_cmd' not in dargs:
1144 cmd = ' && '.join(['echo 0 > /sys/class/rtc/rtc0/wakealarm',
1145 'echo +%d > /sys/class/rtc/rtc0/wakealarm' % suspend_time,
1146 'powerd_dbus_suspend --delay=0 &'])
1147 dargs['suspend_cmd'] = ('(( %s )'
1148 '< /dev/null >/dev/null 2>&1 &)' % cmd)
1149 super(CrosHost, self).suspend(**dargs)
1150
1151
Simran Basiec564392014-08-25 16:48:09 -07001152 def upstart_status(self, service_name):
1153 """Check the status of an upstart init script.
1154
1155 @param service_name: Service to look up.
1156
1157 @returns True if the service is running, False otherwise.
1158 """
1159 return self.run('status %s | grep start/running' %
1160 service_name).stdout.strip() != ''
1161
1162
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001163 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001164 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001165
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001166 Tests for the following conditions:
1167 1. All conditions tested by the parent version of this
1168 function.
1169 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001170 3. Sufficient space in /mnt/stateful_partition/encrypted.
1171 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001172
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001173 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001174 super(CrosHost, self).verify_software()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001175 self.check_diskspace(
1176 '/mnt/stateful_partition',
1177 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001178 'SERVER', 'gb_diskspace_required', type=float,
1179 default=20.0))
Gaurav Shahe448af82014-06-19 15:18:59 -07001180 encrypted_stateful_path = '/mnt/stateful_partition/encrypted'
1181 # Not all targets build with encrypted stateful support.
1182 if self.path_exists(encrypted_stateful_path):
1183 self.check_diskspace(
1184 encrypted_stateful_path,
1185 global_config.global_config.get_config_value(
1186 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1187 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001188
Simran Basiec564392014-08-25 16:48:09 -07001189 if not self.upstart_status('system-services'):
Prashanth B5d0a0512014-04-25 12:26:08 -07001190 raise error.AutoservError('Chrome failed to reach login. '
1191 'System services not running.')
1192
beepsc87ff602013-07-31 21:53:00 -07001193 # Factory images don't run update engine,
1194 # goofy controls dbus on these DUTs.
1195 if not self._is_factory_image():
1196 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001197 # Makes sure python is present, loads and can use built in functions.
1198 # We have seen cases where importing cPickle fails with undefined
1199 # symbols in cPickle.so.
1200 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001201
1202
Fang Deng96667ca2013-08-01 17:46:18 -07001203 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1204 connect_timeout=None, alive_interval=None):
1205 """Override default make_ssh_command to use options tuned for Chrome OS.
1206
1207 Tuning changes:
1208 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1209 connection failure. Consistency with remote_access.sh.
1210
1211 - ServerAliveInterval=180; which causes SSH to ping connection every
1212 180 seconds. In conjunction with ServerAliveCountMax ensures
1213 that if the connection dies, Autotest will bail out quickly.
1214 Originally tried 60 secs, but saw frequent job ABORTS where
1215 the test completed successfully.
1216
1217 - ServerAliveCountMax=3; consistency with remote_access.sh.
1218
1219 - ConnectAttempts=4; reduce flakiness in connection errors;
1220 consistency with remote_access.sh.
1221
1222 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1223 Host keys change with every new installation, don't waste
1224 memory/space saving them.
1225
1226 - SSH protocol forced to 2; needed for ServerAliveInterval.
1227
1228 @param user User name to use for the ssh connection.
1229 @param port Port on the target host to use for ssh connection.
1230 @param opts Additional options to the ssh command.
1231 @param hosts_file Ignored.
1232 @param connect_timeout Ignored.
1233 @param alive_interval Ignored.
1234 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001235 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1236 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001237 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1238 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1239 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1240 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001241 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1242 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001243
1244
beeps32a63082013-08-22 14:02:29 -07001245 def _create_ssh_tunnel(self, port, local_port):
1246 """Create an ssh tunnel from local_port to port.
1247
1248 @param port: remote port on the host.
1249 @param local_port: local forwarding port.
1250
1251 @return: the tunnel process.
1252 """
1253 # Chrome OS on the target closes down most external ports
1254 # for security. We could open the port, but doing that
1255 # would conflict with security tests that check that only
1256 # expected ports are open. So, to get to the port on the
1257 # target we use an ssh tunnel.
1258 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1259 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1260 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1261 logging.debug('Full tunnel command: %s', tunnel_cmd)
1262 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1263 logging.debug('Started ssh tunnel, local = %d'
1264 ' remote = %d, pid = %d',
1265 local_port, port, tunnel_proc.pid)
1266 return tunnel_proc
1267
1268
Christopher Wileydd181852013-10-10 19:56:58 -07001269 def _setup_rpc(self, port, command_name, remote_pid=None):
beeps32a63082013-08-22 14:02:29 -07001270 """Sets up a tunnel process and performs rpc connection book keeping.
1271
1272 This method assumes that xmlrpc and jsonrpc never conflict, since
1273 we can only either have an xmlrpc or a jsonrpc server listening on
1274 a remote port. As such, it enforces a single proxy->remote port
1275 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1276 and then tries to start an xmlrpc proxy forwarded to the same port,
1277 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1278
1279 1. None of the methods on the xmlrpc proxy will work because
1280 the server listening on B is jsonrpc.
1281
1282 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1283 server, as the only use case currently is goofy, which is tied to
1284 the factory image. It is much easier to handle a failed xmlrpc
1285 call on the client than it is to terminate goofy in this scenario,
1286 as doing the latter might leave the DUT in a hard to recover state.
1287
1288 With the current implementation newer rpc proxy connections will
1289 terminate the tunnel processes of older rpc connections tunneling
1290 to the same remote port. If methods are invoked on the client
1291 after this has happened they will fail with connection closed errors.
1292
1293 @param port: The remote forwarding port.
1294 @param command_name: The name of the remote process, to terminate
1295 using pkill.
1296
1297 @return A url that we can use to initiate the rpc connection.
1298 """
1299 self.rpc_disconnect(port)
1300 local_port = utils.get_unused_port()
1301 tunnel_proc = self._create_ssh_tunnel(port, local_port)
Christopher Wileydd181852013-10-10 19:56:58 -07001302 self._rpc_proxy_map[port] = (command_name, tunnel_proc, remote_pid)
beeps32a63082013-08-22 14:02:29 -07001303 return self._RPC_PROXY_URL % local_port
1304
1305
Christopher Wileyd78249a2013-03-01 13:05:31 -08001306 def xmlrpc_connect(self, command, port, command_name=None,
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001307 ready_test_name=None, timeout_seconds=10,
1308 logfile='/dev/null'):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001309 """Connect to an XMLRPC server on the host.
1310
1311 The `command` argument should be a simple shell command that
1312 starts an XMLRPC server on the given `port`. The command
1313 must not daemonize, and must terminate cleanly on SIGTERM.
1314 The command is started in the background on the host, and a
1315 local XMLRPC client for the server is created and returned
1316 to the caller.
1317
1318 Note that the process of creating an XMLRPC client makes no
1319 attempt to connect to the remote server; the caller is
1320 responsible for determining whether the server is running
1321 correctly, and is ready to serve requests.
1322
Christopher Wileyd78249a2013-03-01 13:05:31 -08001323 Optionally, the caller can pass ready_test_name, a string
1324 containing the name of a method to call on the proxy. This
1325 method should take no parameters and return successfully only
1326 when the server is ready to process client requests. When
1327 ready_test_name is set, xmlrpc_connect will block until the
1328 proxy is ready, and throw a TestError if the server isn't
1329 ready by timeout_seconds.
1330
beeps32a63082013-08-22 14:02:29 -07001331 If a server is already running on the remote port, this
1332 method will kill it and disconnect the tunnel process
1333 associated with the connection before establishing a new one,
1334 by consulting the rpc_proxy_map in rpc_disconnect.
1335
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001336 @param command Shell command to start the server.
1337 @param port Port number on which the server is expected to
1338 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001339 @param command_name String to use as input to `pkill` to
1340 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001341 @param ready_test_name String containing the name of a
1342 method defined on the XMLRPC server.
1343 @param timeout_seconds Number of seconds to wait
1344 for the server to become 'ready.' Will throw a
1345 TestFail error if server is not ready in time.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001346 @param logfile Logfile to send output when running
1347 'command' argument.
Yusuf Mohsinally8d19e3c2013-11-21 14:25:45 -08001348
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001349 """
Christopher Wileyc14f06a2013-10-16 13:55:39 -07001350 # Clean up any existing state. If the caller is willing
1351 # to believe their server is down, we ought to clean up
1352 # any tunnels we might have sitting around.
1353 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001354 # Start the server on the host. Redirection in the command
1355 # below is necessary, because 'ssh' won't terminate until
1356 # background child processes close stdin, stdout, and
1357 # stderr.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001358 remote_cmd = '%s </dev/null >%s 2>&1 & echo $!' % (command, logfile)
Christopher Wileydd181852013-10-10 19:56:58 -07001359 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001360 logging.debug('Started XMLRPC server on host %s, pid = %s',
1361 self.hostname, remote_pid)
1362
Christopher Wileydd181852013-10-10 19:56:58 -07001363 # Tunnel through SSH to be able to reach that remote port.
1364 rpc_url = self._setup_rpc(port, command_name, remote_pid=remote_pid)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001365 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001366
Christopher Wileyd78249a2013-03-01 13:05:31 -08001367 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001368 # retry.retry logs each attempt; calculate delay_sec to
1369 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001370 @retry.retry((socket.error,
1371 xmlrpclib.ProtocolError,
1372 httplib.BadStatusLine),
Chris Sosa65425082013-10-16 13:26:22 -07001373 timeout_min=timeout_seconds / 60.0,
1374 delay_sec=min(max(timeout_seconds / 20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001375 def ready_test():
1376 """ Call proxy.ready_test_name(). """
1377 getattr(proxy, ready_test_name)()
1378 successful = False
1379 try:
1380 logging.info('Waiting %d seconds for XMLRPC server '
1381 'to start.', timeout_seconds)
1382 ready_test()
1383 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001384 finally:
1385 if not successful:
1386 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001387 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001388 logging.info('XMLRPC server started successfully.')
1389 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001390
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001391
Jason Abeleb6f924f2013-11-13 16:01:54 -08001392 def syslog(self, message, tag='autotest'):
1393 """Logs a message to syslog on host.
1394
1395 @param message String message to log into syslog
1396 @param tag String tag prefix for syslog
1397
1398 """
1399 self.run('logger -t "%s" "%s"' % (tag, message))
1400
1401
beeps32a63082013-08-22 14:02:29 -07001402 def jsonrpc_connect(self, port):
1403 """Creates a jsonrpc proxy connection through an ssh tunnel.
1404
1405 This method exists to facilitate communication with goofy (which is
1406 the default system manager on all factory images) and as such, leaves
1407 most of the rpc server sanity checking to the caller. Unlike
1408 xmlrpc_connect, this method does not facilitate the creation of a remote
1409 jsonrpc server, as the only clients of this code are factory tests,
1410 for which the goofy system manager is built in to the image and starts
1411 when the target boots.
1412
1413 One can theoretically create multiple jsonrpc proxies all forwarded
1414 to the same remote port, provided the remote port has an rpc server
1415 listening. However, in doing so we stand the risk of leaking an
1416 existing tunnel process, so we always disconnect any older tunnels
1417 we might have through rpc_disconnect.
1418
1419 @param port: port on the remote host that is serving this proxy.
1420
1421 @return: The client proxy.
1422 """
1423 if not jsonrpclib:
1424 logging.warning('Jsonrpclib could not be imported. Check that '
1425 'site-packages contains jsonrpclib.')
1426 return None
1427
1428 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1429
1430 logging.info('Established a jsonrpc connection through port %s.', port)
1431 return proxy
1432
1433
1434 def rpc_disconnect(self, port):
1435 """Disconnect from an RPC server on the host.
1436
1437 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001438 the given `port`. Also closes the local ssh tunnel created
1439 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001440 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001441 client object; however disconnection will cause all
1442 subsequent calls to methods on the object to fail.
1443
1444 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001445 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001446
1447 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001448 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001449 """
beeps32a63082013-08-22 14:02:29 -07001450 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001451 return
Christopher Wileydd181852013-10-10 19:56:58 -07001452 remote_name, tunnel_proc, remote_pid = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001453 if remote_name:
1454 # We use 'pkill' to find our target process rather than
1455 # a PID, because the host may have rebooted since
1456 # connecting, and we don't want to kill an innocent
1457 # process with the same PID.
1458 #
1459 # 'pkill' helpfully exits with status 1 if no target
1460 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001461 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001462 # status.
1463 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001464 if remote_pid:
1465 logging.info('Waiting for RPC server "%s" shutdown',
1466 remote_name)
1467 start_time = time.time()
1468 while (time.time() - start_time <
1469 self._RPC_SHUTDOWN_TIMEOUT_SECONDS):
1470 running_processes = self.run(
1471 "pgrep -f '%s'" % remote_name,
1472 ignore_status=True).stdout.split()
1473 if not remote_pid in running_processes:
1474 logging.info('Shut down RPC server.')
1475 break
1476 time.sleep(self._RPC_SHUTDOWN_POLLING_PERIOD_SECONDS)
1477 else:
1478 raise error.TestError('Failed to shutdown RPC server %s' %
1479 remote_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001480
1481 if tunnel_proc.poll() is None:
1482 tunnel_proc.terminate()
1483 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1484 else:
1485 logging.debug('Tunnel pid %d terminated early, status %d',
1486 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001487 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001488
1489
beeps32a63082013-08-22 14:02:29 -07001490 def rpc_disconnect_all(self):
1491 """Disconnect all known RPC proxy ports."""
1492 for port in self._rpc_proxy_map.keys():
1493 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001494
1495
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001496 def poor_mans_rpc(self, fun):
1497 """
1498 Calls a function from client utils on the host and returns a string.
1499
1500 @param fun function in client utils namespace.
1501 @return output string from calling fun.
1502 """
Simran Basi263a9d32014-08-19 11:16:51 -07001503 script = 'cd %s/bin; ' % autotest.Autotest.get_installed_autodir(self)
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001504 script += 'python -c "import common; import utils;'
1505 script += 'print utils.%s"' % fun
1506 return script
1507
1508
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001509 def _ping_check_status(self, status):
1510 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001511
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001512 @param status Check the ping status against this value.
1513 @return True iff `status` and the result of ping are the same
1514 (i.e. both True or both False).
1515
1516 """
1517 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1518 return not (status ^ (ping_val == 0))
1519
1520 def _ping_wait_for_status(self, status, timeout):
1521 """Wait for the host to have a given status (UP or DOWN).
1522
1523 Status is checked by polling. Polling will not last longer
1524 than the number of seconds in `timeout`. The polling
1525 interval will be long enough that only approximately
1526 _PING_WAIT_COUNT polling cycles will be executed, subject
1527 to a maximum interval of about one minute.
1528
1529 @param status Waiting will stop immediately if `ping` of the
1530 host returns this status.
1531 @param timeout Poll for at most this many seconds.
1532 @return True iff the host status from `ping` matched the
1533 requested status at the time of return.
1534
1535 """
1536 # _ping_check_status() takes about 1 second, hence the
1537 # "- 1" in the formula below.
1538 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1539 end_time = time.time() + timeout
1540 while time.time() <= end_time:
1541 if self._ping_check_status(status):
1542 return True
1543 if poll_interval > 0:
1544 time.sleep(poll_interval)
1545
1546 # The last thing we did was sleep(poll_interval), so it may
1547 # have been too long since the last `ping`. Check one more
1548 # time, just to be sure.
1549 return self._ping_check_status(status)
1550
1551 def ping_wait_up(self, timeout):
1552 """Wait for the host to respond to `ping`.
1553
1554 N.B. This method is not a reliable substitute for
1555 `wait_up()`, because a host that responds to ping will not
1556 necessarily respond to ssh. This method should only be used
1557 if the target DUT can be considered functional even if it
1558 can't be reached via ssh.
1559
1560 @param timeout Minimum time to allow before declaring the
1561 host to be non-responsive.
1562 @return True iff the host answered to ping before the timeout.
1563
1564 """
1565 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001566
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001567 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001568 """Wait until the host no longer responds to `ping`.
1569
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001570 This function can be used as a slightly faster version of
1571 `wait_down()`, by avoiding potentially long ssh timeouts.
1572
1573 @param timeout Minimum time to allow for the host to become
1574 non-responsive.
1575 @return True iff the host quit answering ping before the
1576 timeout.
1577
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001578 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001579 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001580
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001581 def test_wait_for_sleep(self, sleep_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001582 """Wait for the client to enter low-power sleep mode.
1583
1584 The test for "is asleep" can't distinguish a system that is
1585 powered off; to confirm that the unit was asleep, it is
1586 necessary to force resume, and then call
1587 `test_wait_for_resume()`.
1588
1589 This function is expected to be called from a test as part
1590 of a sequence like the following:
1591
1592 ~~~~~~~~
1593 boot_id = host.get_boot_id()
1594 # trigger sleep on the host
1595 host.test_wait_for_sleep()
1596 # trigger resume on the host
1597 host.test_wait_for_resume(boot_id)
1598 ~~~~~~~~
1599
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001600 @param sleep_timeout time limit in seconds to allow the host sleep.
1601
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001602 @exception TestFail The host did not go to sleep within
1603 the allowed time.
1604 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001605 if sleep_timeout is None:
1606 sleep_timeout = self.SLEEP_TIMEOUT
1607
1608 if not self.ping_wait_down(timeout=sleep_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001609 raise error.TestFail(
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001610 'client failed to sleep after %d seconds' % sleep_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001611
1612
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001613 def test_wait_for_resume(self, old_boot_id, resume_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001614 """Wait for the client to resume from low-power sleep mode.
1615
1616 The `old_boot_id` parameter should be the value from
1617 `get_boot_id()` obtained prior to entering sleep mode. A
1618 `TestFail` exception is raised if the boot id changes.
1619
1620 See @ref test_wait_for_sleep for more on this function's
1621 usage.
1622
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001623 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001624 target host went to sleep.
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001625 @param resume_timeout time limit in seconds to allow the host up.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001626
1627 @exception TestFail The host did not respond within the
1628 allowed time.
1629 @exception TestFail The host responded, but the boot id test
1630 indicated a reboot rather than a sleep
1631 cycle.
1632 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001633 if resume_timeout is None:
1634 resume_timeout = self.RESUME_TIMEOUT
1635
1636 if not self.wait_up(timeout=resume_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001637 raise error.TestFail(
1638 'client failed to resume from sleep after %d seconds' %
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001639 resume_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001640 else:
1641 new_boot_id = self.get_boot_id()
1642 if new_boot_id != old_boot_id:
1643 raise error.TestFail(
1644 'client rebooted, but sleep was expected'
1645 ' (old boot %s, new boot %s)'
1646 % (old_boot_id, new_boot_id))
1647
1648
1649 def test_wait_for_shutdown(self):
1650 """Wait for the client to shut down.
1651
1652 The test for "has shut down" can't distinguish a system that
1653 is merely asleep; to confirm that the unit was down, it is
1654 necessary to force boot, and then call test_wait_for_boot().
1655
1656 This function is expected to be called from a test as part
1657 of a sequence like the following:
1658
1659 ~~~~~~~~
1660 boot_id = host.get_boot_id()
1661 # trigger shutdown on the host
1662 host.test_wait_for_shutdown()
1663 # trigger boot on the host
1664 host.test_wait_for_boot(boot_id)
1665 ~~~~~~~~
1666
1667 @exception TestFail The host did not shut down within the
1668 allowed time.
1669 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001670 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001671 raise error.TestFail(
1672 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001673 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001674
1675
1676 def test_wait_for_boot(self, old_boot_id=None):
1677 """Wait for the client to boot from cold power.
1678
1679 The `old_boot_id` parameter should be the value from
1680 `get_boot_id()` obtained prior to shutting down. A
1681 `TestFail` exception is raised if the boot id does not
1682 change. The boot id test is omitted if `old_boot_id` is not
1683 specified.
1684
1685 See @ref test_wait_for_shutdown for more on this function's
1686 usage.
1687
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001688 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001689 shut down.
1690
1691 @exception TestFail The host did not respond within the
1692 allowed time.
1693 @exception TestFail The host responded, but the boot id test
1694 indicated that there was no reboot.
1695 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001696 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001697 raise error.TestFail(
1698 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001699 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001700 elif old_boot_id:
1701 if self.get_boot_id() == old_boot_id:
1702 raise error.TestFail(
1703 'client is back up, but did not reboot'
1704 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001705
1706
1707 @staticmethod
1708 def check_for_rpm_support(hostname):
1709 """For a given hostname, return whether or not it is powered by an RPM.
1710
Simran Basi1df55112013-09-06 11:25:09 -07001711 @param hostname: hostname to check for rpm support.
1712
Simran Basid5e5e272012-09-24 15:23:59 -07001713 @return None if this host does not follows the defined naming format
1714 for RPM powered DUT's in the lab. If it does follow the format,
1715 it returns a regular expression MatchObject instead.
1716 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001717 return re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001718
1719
1720 def has_power(self):
1721 """For this host, return whether or not it is powered by an RPM.
1722
1723 @return True if this host is in the CROS lab and follows the defined
1724 naming format.
1725 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001726 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001727
1728
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001729 def _set_power(self, state, power_method):
1730 """Sets the power to the host via RPM, Servo or manual.
1731
1732 @param state Specifies which power state to set to DUT
1733 @param power_method Specifies which method of power control to
1734 use. By default "RPM" will be used. Valid values
1735 are the strings "RPM", "manual", "servoj10".
1736
1737 """
1738 ACCEPTABLE_STATES = ['ON', 'OFF']
1739
1740 if state.upper() not in ACCEPTABLE_STATES:
1741 raise error.TestError('State must be one of: %s.'
1742 % (ACCEPTABLE_STATES,))
1743
1744 if power_method == self.POWER_CONTROL_SERVO:
1745 logging.info('Setting servo port J10 to %s', state)
1746 self.servo.set('prtctl3_pwren', state.lower())
1747 time.sleep(self._USB_POWER_TIMEOUT)
1748 elif power_method == self.POWER_CONTROL_MANUAL:
1749 logging.info('You have %d seconds to set the AC power to %s.',
1750 self._POWER_CYCLE_TIMEOUT, state)
1751 time.sleep(self._POWER_CYCLE_TIMEOUT)
1752 else:
1753 if not self.has_power():
1754 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001755 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1756 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1757 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07001758 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07001759
1760
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001761 def power_off(self, power_method=POWER_CONTROL_RPM):
1762 """Turn off power to this host via RPM, Servo or manual.
1763
1764 @param power_method Specifies which method of power control to
1765 use. By default "RPM" will be used. Valid values
1766 are the strings "RPM", "manual", "servoj10".
1767
1768 """
1769 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001770
1771
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001772 def power_on(self, power_method=POWER_CONTROL_RPM):
1773 """Turn on power to this host via RPM, Servo or manual.
1774
1775 @param power_method Specifies which method of power control to
1776 use. By default "RPM" will be used. Valid values
1777 are the strings "RPM", "manual", "servoj10".
1778
1779 """
1780 self._set_power('ON', power_method)
1781
1782
1783 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1784 """Cycle power to this host by turning it OFF, then ON.
1785
1786 @param power_method Specifies which method of power control to
1787 use. By default "RPM" will be used. Valid values
1788 are the strings "RPM", "manual", "servoj10".
1789
1790 """
1791 if power_method in (self.POWER_CONTROL_SERVO,
1792 self.POWER_CONTROL_MANUAL):
1793 self.power_off(power_method=power_method)
1794 time.sleep(self._POWER_CYCLE_TIMEOUT)
1795 self.power_on(power_method=power_method)
1796 else:
1797 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001798
1799
1800 def get_platform(self):
1801 """Determine the correct platform label for this host.
1802
1803 @returns a string representing this host's platform.
1804 """
1805 crossystem = utils.Crossystem(self)
1806 crossystem.init()
1807 # Extract fwid value and use the leading part as the platform id.
1808 # fwid generally follow the format of {platform}.{firmware version}
1809 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1810 platform = crossystem.fwid().split('.')[0].lower()
1811 # Newer platforms start with 'Google_' while the older ones do not.
1812 return platform.replace('google_', '')
1813
1814
Hung-ying Tyanb1328032014-04-01 14:18:54 +08001815 def get_architecture(self):
1816 """Determine the correct architecture label for this host.
1817
1818 @returns a string representing this host's architecture.
1819 """
1820 crossystem = utils.Crossystem(self)
1821 crossystem.init()
1822 return crossystem.arch()
1823
1824
Luis Lozano40b7d0d2014-01-17 15:12:06 -08001825 def get_chrome_version(self):
1826 """Gets the Chrome version number and milestone as strings.
1827
1828 Invokes "chrome --version" to get the version number and milestone.
1829
1830 @return A tuple (chrome_ver, milestone) where "chrome_ver" is the
1831 current Chrome version number as a string (in the form "W.X.Y.Z")
1832 and "milestone" is the first component of the version number
1833 (the "W" from "W.X.Y.Z"). If the version number cannot be parsed
1834 in the "W.X.Y.Z" format, the "chrome_ver" will be the full output
1835 of "chrome --version" and the milestone will be the empty string.
1836
1837 """
1838 version_string = self.run(constants.CHROME_VERSION_COMMAND).stdout
1839 return utils.parse_chrome_version(version_string)
1840
Aviv Keshet74c89a92013-02-04 15:18:30 -08001841 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001842 def get_board(self):
1843 """Determine the correct board label for this host.
1844
1845 @returns a string representing this host's board.
1846 """
1847 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1848 run_method=self.run)
1849 board = release_info['CHROMEOS_RELEASE_BOARD']
1850 # Devices in the lab generally have the correct board name but our own
1851 # development devices have {board_name}-signed-{key_type}. The board
1852 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001853 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001854 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001855 return board_format_string % board.split('-')[0]
1856 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001857
1858
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001859 @label_decorator('board_freq_mem')
1860 def get_board_with_frequency_and_memory(self):
1861 """
1862 Determines the board name with frequency and memory.
1863
1864 @returns a more detailed string representing the board. Examples are
1865 butterfly_1.1GHz_2GB, link_1.8GHz_4GB, x86-zgb_1.7GHz_2GB
1866 """
1867 board = self.run(self.poor_mans_rpc(
1868 'get_board_with_frequency_and_memory()')).stdout
1869 return 'board_freq_mem:%s' % str.strip(board)
1870
1871
Aviv Keshet74c89a92013-02-04 15:18:30 -08001872 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001873 def has_lightsensor(self):
1874 """Determine the correct board label for this host.
1875
1876 @returns the string 'lightsensor' if this host has a lightsensor or
1877 None if it does not.
1878 """
1879 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001880 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001881 try:
1882 # Run the search cmd following the symlinks. Stderr_tee is set to
1883 # None as there can be a symlink loop, but the command will still
1884 # execute correctly with a few messages printed to stderr.
1885 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1886 return 'lightsensor'
1887 except error.AutoservRunError:
1888 # egrep exited with a return code of 1 meaning none of the possible
1889 # lightsensor files existed.
1890 return None
1891
1892
Aviv Keshet74c89a92013-02-04 15:18:30 -08001893 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001894 def has_bluetooth(self):
1895 """Determine the correct board label for this host.
1896
1897 @returns the string 'bluetooth' if this host has bluetooth or
1898 None if it does not.
1899 """
1900 try:
1901 self.run('test -d /sys/class/bluetooth/hci0')
1902 # test exited with a return code of 0.
1903 return 'bluetooth'
1904 except error.AutoservRunError:
1905 # test exited with a return code 1 meaning the directory did not
1906 # exist.
1907 return None
1908
1909
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001910 @label_decorator('gpu_family')
1911 def get_gpu_family(self):
1912 """
1913 Determine GPU family.
1914
1915 @returns a string representing the gpu family. Examples are mali, tegra,
1916 pinetrail, sandybridge, ivybridge, haswell and baytrail.
1917 """
1918 gpu_family = self.run(self.poor_mans_rpc('get_gpu_family()')).stdout
1919 return 'gpu_family:%s' % str.strip(gpu_family)
1920
1921
Ilja Friedel0ce0b602013-08-15 18:45:27 -07001922 @label_decorator('graphics')
1923 def get_graphics(self):
1924 """
1925 Determine the correct board label for this host.
1926
1927 @returns a string representing this host's graphics. For now ARM boards
1928 return graphics:gles while all other boards return graphics:gl. This
1929 may change over time, but for robustness reasons this should avoid
1930 executing code in actual graphics libraries (which may not be ready and
1931 is tested by graphics_GLAPICheck).
1932 """
1933 uname = self.run('uname -a').stdout.lower()
1934 if 'arm' in uname:
1935 return 'graphics:gles'
1936 return 'graphics:gl'
1937
1938
Bill Richardson4f595f52014-02-13 16:20:26 -08001939 @label_decorator('ec')
1940 def get_ec(self):
1941 """
1942 Determine the type of EC on this host.
1943
1944 @returns a string representing this host's embedded controller type.
1945 At present, it only returns "ec:cros", for Chrome OS ECs. Other types
1946 of EC (or none) don't return any strings, since no tests depend on
1947 those.
1948 """
1949 cmd = 'mosys ec info'
1950 # The output should look like these, so that the last field should
1951 # match our EC version scheme:
1952 #
1953 # stm | stm32f100 | snow_v1.3.139-375eb9f
1954 # ti | Unknown-10de | peppy_v1.5.114-5d52788
1955 #
1956 # Non-Chrome OS ECs will look like these:
1957 #
1958 # ENE | KB932 | 00BE107A00
1959 # ite | it8518 | 3.08
1960 #
1961 # And some systems don't have ECs at all (Lumpy, for example).
1962 regexp = r'^.*\|\s*(\S+_v\d+\.\d+\.\d+-[0-9a-f]+)\s*$'
1963
1964 ecinfo = self.run(command=cmd, ignore_status=True)
1965 if ecinfo.exit_status == 0:
1966 res = re.search(regexp, ecinfo.stdout)
1967 if res:
1968 logging.info("EC version is %s", res.groups()[0])
1969 return 'ec:cros'
1970 logging.info("%s got: %s", cmd, ecinfo.stdout)
1971 # Has an EC, but it's not a Chrome OS EC
1972 return None
1973 logging.info("%s exited with status %d", cmd, ecinfo.exit_status)
1974 # No EC present
1975 return None
1976
1977
Alec Berg31b932b2014-04-04 16:09:11 -07001978 @label_decorator('accels')
1979 def get_accels(self):
1980 """
1981 Determine the type of accelerometers on this host.
1982
1983 @returns a string representing this host's accelerometer type.
1984 At present, it only returns "accel:cros-ec", for accelerometers
1985 attached to a Chrome OS EC, or none, if no accelerometers.
1986 """
1987 # Check to make sure we have ectool
1988 rv = self.run('which ectool', ignore_status=True)
1989 if rv.exit_status:
1990 logging.info("No ectool cmd found, assuming no EC accelerometers")
1991 return None
1992
1993 # Check that the EC supports the motionsense command
1994 rv = self.run('ectool motionsense', ignore_status=True)
1995 if rv.exit_status:
1996 logging.info("EC does not support motionsense command "
1997 "assuming no EC accelerometers")
1998 return None
1999
2000 # Check that EC motion sensors are active
2001 active = self.run('ectool motionsense active').stdout.split('\n')
2002 if active[0] == "0":
2003 logging.info("Motion sense inactive, assuming no EC accelerometers")
2004 return None
2005
2006 logging.info("EC accelerometers found")
2007 return 'accel:cros-ec'
2008
2009
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002010 @label_decorator('chameleon')
2011 def has_chameleon(self):
2012 """Determine if a Chameleon connected to this host.
2013
Tom Wai-Hong Tam3d75ebc2014-08-12 08:57:25 +08002014 @returns the string 'chameleon:' + label, e.g. 'chameleon:hdmi',
2015 if this host has a Chameleon or None if it has not.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002016 """
2017 if self._chameleon_host:
Tom Wai-Hong Tam3d75ebc2014-08-12 08:57:25 +08002018 return 'chameleon:' + self.chameleon.get_label()
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002019 else:
2020 return None
2021
2022
Derek Basehorec71ff622014-07-07 15:18:40 -07002023 @label_decorator('power_supply')
2024 def get_power_supply(self):
2025 """
2026 Determine what type of power supply the host has
2027
2028 @returns a string representing this host's power supply.
2029 'power:battery' when the device has a battery intended for
2030 extended use
2031 'power:AC_primary' when the device has a battery not intended
2032 for extended use (for moving the machine, etc)
2033 'power:AC_only' when the device has no battery at all.
2034 """
2035 psu = self.run(command='mosys psu type', ignore_status=True)
2036 if psu.exit_status:
2037 # The psu command for mosys is not included for all platforms. The
2038 # assumption is that the device will have a battery if the command
2039 # is not found.
2040 return 'power:battery'
2041
2042 psu_str = psu.stdout.strip()
2043 if psu_str == 'unknown':
2044 return None
2045
2046 return 'power:%s' % psu_str
2047
2048
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002049 @label_decorator('storage')
2050 def get_storage(self):
2051 """
2052 Determine the type of boot device for this host.
2053
2054 Determine if the internal device is SCSI or dw_mmc device.
2055 Then check that it is SSD or HDD or eMMC or something else.
2056
2057 @returns a string representing this host's internal device type.
2058 'storage:ssd' when internal device is solid state drive
2059 'storage:hdd' when internal device is hard disk drive
2060 'storage:mmc' when internal device is mmc drive
2061 None When internal device is something else or
2062 when we are unable to determine the type
2063 """
2064 # The output should be /dev/mmcblk* for SD/eMMC or /dev/sd* for scsi
2065 rootdev_cmd = ' '.join(['. /usr/sbin/write_gpt.sh;',
2066 '. /usr/share/misc/chromeos-common.sh;',
2067 'load_base_vars;',
2068 'get_fixed_dst_drive'])
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002069 rootdev = self.run(command=rootdev_cmd, ignore_status=True)
2070 if rootdev.exit_status:
2071 logging.info("Fail to run %s", rootdev_cmd)
2072 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002073 rootdev_str = rootdev.stdout.strip()
2074
2075 if not rootdev_str:
2076 return None
2077
2078 rootdev_base = os.path.basename(rootdev_str)
2079
2080 mmc_pattern = '/dev/mmcblk[0-9]'
2081 if re.match(mmc_pattern, rootdev_str):
2082 # Use type to determine if the internal device is eMMC or somthing
2083 # else. We can assume that MMC is always an internal device.
2084 type_cmd = 'cat /sys/block/%s/device/type' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002085 type = self.run(command=type_cmd, ignore_status=True)
2086 if type.exit_status:
2087 logging.info("Fail to run %s", type_cmd)
2088 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002089 type_str = type.stdout.strip()
2090
2091 if type_str == 'MMC':
2092 return 'storage:mmc'
2093
2094 scsi_pattern = '/dev/sd[a-z]+'
2095 if re.match(scsi_pattern, rootdev.stdout):
2096 # Read symlink for /sys/block/sd* to determine if the internal
2097 # device is connected via ata or usb.
2098 link_cmd = 'readlink /sys/block/%s' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002099 link = self.run(command=link_cmd, ignore_status=True)
2100 if link.exit_status:
2101 logging.info("Fail to run %s", link_cmd)
2102 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002103 link_str = link.stdout.strip()
2104 if 'usb' in link_str:
2105 return None
2106
2107 # Read rotation to determine if the internal device is ssd or hdd.
2108 rotate_cmd = str('cat /sys/block/%s/queue/rotational'
2109 % rootdev_base)
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002110 rotate = self.run(command=rotate_cmd, ignore_status=True)
2111 if rotate.exit_status:
2112 logging.info("Fail to run %s", rotate_cmd)
2113 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002114 rotate_str = rotate.stdout.strip()
2115
2116 rotate_dict = {'0':'storage:ssd', '1':'storage:hdd'}
2117 return rotate_dict.get(rotate_str)
2118
2119 # All other internal device / error case will always fall here
2120 return None
2121
2122
Dan Shi4e9a2aa2014-03-24 14:28:42 -07002123 @label_decorator('servo')
2124 def get_servo(self):
2125 """Determine if the host has a servo attached.
2126
2127 If the host has a working servo attached, it should have a servo label.
2128
2129 @return: string 'servo' if the host has servo attached. Otherwise,
2130 returns None.
2131 """
2132 return 'servo' if self._servo_host else None
2133
2134
Dan Shi5beba472014-05-28 22:46:07 -07002135 @label_decorator('video_labels')
2136 def get_video_labels(self):
2137 """Run /usr/local/bin/avtest_label_detect to get a list of video labels.
2138
2139 Sample output of avtest_label_detect:
2140 Detected label: hw_video_acc_vp8
2141 Detected label: webcam
2142
2143 @return: A list of labels detected by tool avtest_label_detect.
2144 """
2145 try:
Simran Basi40ca8182014-07-17 18:41:20 -07002146 # TODO (sbasi) crbug.com/391081 - Remove once the proper fix has
2147 # landed and supporting images older than the fix is no longer
2148 # necessary.
2149 # Change back to VT1 so avtest_label_detect does not get stuck.
2150 self.run('chvt 1')
Dan Shi5beba472014-05-28 22:46:07 -07002151 result = self.run('/usr/local/bin/avtest_label_detect').stdout
2152 return re.findall('^Detected label: (\w+)$', result, re.M)
2153 except error.AutoservRunError:
2154 # The tool is not installed.
2155 return []
2156
2157
mussa584b4462014-06-20 15:13:28 -07002158 @label_decorator('video_glitch_detection')
2159 def is_video_glitch_detection_supported(self):
2160 """ Determine if a board under test is supported for video glitch
2161 detection tests.
2162
2163 @return: 'video_glitch_detection' if board is supported, None otherwise.
2164 """
2165 parser = ConfigParser.SafeConfigParser()
2166 filename = os.path.join(
2167 common.autotest_dir, 'client/cros/video/device_spec.conf')
2168
2169 dut = self.get_board().replace(ds_constants.BOARD_PREFIX, '')
2170
2171 try:
2172 parser.read(filename)
mussa584b4462014-06-20 15:13:28 -07002173 supported_boards = parser.sections()
2174
Mussa2cba43a2014-07-24 10:38:08 -07002175 # Some boards have multiple resolutions. e.g: nyan_big has standard
2176 # and high definitions. The conf file has something like nyan_big_sd
2177
2178 for board in supported_boards:
2179 if board.startswith(dut):
2180 return 'video_glitch_detection'
2181
2182 return None
mussa584b4462014-06-20 15:13:28 -07002183
2184 except ConfigParser.error:
2185 # something went wrong while parsing the conf file
2186 return None
2187
Katherine Threlkeld7b97a9f2014-06-24 13:47:14 -07002188 @label_decorator('touch_labels')
2189 def get_touch(self):
2190 """
2191 Determine whether board under test has a touchpad or touchscreen.
2192
2193 @return: A list of some combination of 'touchscreen' and 'touchpad',
2194 depending on what is present on the device.
2195 """
2196 labels = []
2197 input_cmd = '/opt/google/input/inputcontrol --names -t %s'
2198 for elt in ['touchpad', 'touchscreen']:
2199 if self.run(input_cmd % elt).stdout:
2200 labels.append(elt)
2201 return labels
2202
2203
mussa584b4462014-06-20 15:13:28 -07002204
Simran Basic6f1f7a2012-10-16 10:47:46 -07002205 def get_labels(self):
2206 """Return a list of labels for this given host.
2207
2208 This is the main way to retrieve all the automatic labels for a host
2209 as it will run through all the currently implemented label functions.
2210 """
2211 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08002212 for label_function in self._LABEL_FUNCTIONS:
J. Richard Barnetteb869b222014-09-03 17:55:44 -07002213 try:
2214 label = label_function(self)
2215 except Exception as e:
2216 logging.error('Label function %s failed; ignoring it.',
2217 label_function.__name__)
2218 logging.exception(e)
2219 label = None
Simran Basic6f1f7a2012-10-16 10:47:46 -07002220 if label:
Dan Shi5beba472014-05-28 22:46:07 -07002221 if type(label) is str:
2222 labels.append(label)
2223 elif type(label) is list:
2224 labels.extend(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -07002225 return labels
Dan Shi85276d42014-04-08 22:11:45 -07002226
2227
2228 def is_boot_from_usb(self):
2229 """Check if DUT is boot from USB.
2230
2231 @return: True if DUT is boot from usb.
2232 """
2233 device = self.run('rootdev -s -d').stdout.strip()
2234 removable = int(self.run('cat /sys/block/%s/removable' %
2235 os.path.basename(device)).stdout.strip())
2236 return removable == 1