blob: 2bb9ef136422e843895ee5962818270f2744a99b [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
mussa584b4462014-06-20 15:13:28 -07005import ConfigParser
Aviv Keshet74c89a92013-02-04 15:18:30 -08006import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07007import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import logging
Dan Shi0f466e82013-02-22 15:44:58 -08009import os
Simran Basid5e5e272012-09-24 15:23:59 -070010import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080011import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070014import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070015
mussa584b4462014-06-20 15:13:28 -070016import common
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.bin import utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070018from autotest_lib.client.common_lib import autotemp
Richard Barnette0c73ffc2012-11-19 15:21:18 -080019from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080022from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080023from autotest_lib.client.common_lib.cros import retry
Dan Shi7dca56e2014-11-11 17:07:56 -080024from autotest_lib.client.common_lib.cros.graphite import es_utils
Michael Liangda8c60a2014-06-03 13:24:51 -070025from autotest_lib.client.common_lib.cros.graphite import stats
MK Ryu35d661e2014-09-25 17:44:10 -070026from autotest_lib.client.cros import constants as client_constants
J. Richard Barnette84890bd2014-02-21 11:05:47 -080027from autotest_lib.client.cros import cros_ui
Cheng-Yi Chiangf4104ff2014-12-23 19:39:01 +080028from autotest_lib.client.cros.audio import cras_utils
MK Ryu35d661e2014-09-25 17:44:10 -070029from autotest_lib.server import autoserv_parser
30from autotest_lib.server import autotest
31from autotest_lib.server import constants
32from autotest_lib.server import crashcollect
Dan Shia1ecd5c2013-06-06 11:21:31 -070033from autotest_lib.server import utils as server_utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070034from autotest_lib.server.cros import provision
Scott Zawalski89c44dd2013-02-26 09:28:02 -050035from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070036from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Dan Shi9cb0eec2014-06-03 09:04:50 -070037from autotest_lib.server.cros.faft.config.config import Config as FAFTConfig
Fang Deng96667ca2013-08-01 17:46:18 -070038from autotest_lib.server.hosts import abstract_ssh
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +080039from autotest_lib.server.hosts import chameleon_host
Fang Deng5d518f42013-08-02 14:04:32 -070040from autotest_lib.server.hosts import servo_host
Simran Basidcff4252012-11-20 16:13:20 -080041from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070042
43
beeps32a63082013-08-22 14:02:29 -070044try:
45 import jsonrpclib
46except ImportError:
47 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070048
Fang Dengd1c2b732013-08-20 12:59:46 -070049
beepsc87ff602013-07-31 21:53:00 -070050class FactoryImageCheckerException(error.AutoservError):
51 """Exception raised when an image is a factory image."""
52 pass
53
54
Aviv Keshet74c89a92013-02-04 15:18:30 -080055def add_label_detector(label_function_list, label_list=None, label=None):
56 """Decorator used to group functions together into the provided list.
57 @param label_function_list: List of label detecting functions to add
58 decorated function to.
59 @param label_list: List of detectable labels to add detectable labels to.
60 (Default: None)
61 @param label: Label string that is detectable by this detection function
62 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080063 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070064 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080065 """
66 @param func: The function to be added as a detector.
67 """
68 label_function_list.append(func)
69 if label and label_list is not None:
70 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070071 return func
72 return add_func
73
74
Fang Deng0ca40e22013-08-27 17:47:44 -070075class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070076 """Chromium OS specific subclass of Host."""
77
78 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050079 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070080
Richard Barnette03a0c132012-11-05 12:40:35 -080081 # Timeout values (in seconds) associated with various Chrome OS
82 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070083 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080084 # In general, a good rule of thumb is that the timeout can be up
85 # to twice the typical measured value on the slowest platform.
86 # The times here have not necessarily been empirically tested to
87 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070088 #
89 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080090 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
91 # time to restart the netwowrk.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080092 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070093 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080094 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080095 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070096 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080097 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080098 # network.
beepsf079cfb2013-09-18 17:49:51 -070099 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800100 # POWERWASH_BOOT_TIMEOUT: Time to allow for a reboot that
101 # includes powerwash.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700102
103 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800104 RESUME_TIMEOUT = 10
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +0800105 SHUTDOWN_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700106 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700107 USB_BOOT_TIMEOUT = 150
J. Richard Barnette7817b052014-08-28 09:47:29 -0700108 INSTALL_TIMEOUT = 480
Dan Shi2c88eed2013-11-12 10:18:38 -0800109 POWERWASH_BOOT_TIMEOUT = 60
Chris Sosab76e0ee2013-05-22 16:55:41 -0700110
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800111 # REBOOT_TIMEOUT: How long to wait for a reboot.
112 #
Chris Sosab76e0ee2013-05-22 16:55:41 -0700113 # We have a long timeout to ensure we don't flakily fail due to other
114 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
Simran Basi1160e2c2013-10-04 16:00:24 -0700115 # TODO(sbasi - crbug.com/276094) Restore to 5 mins once the 'host did not
116 # return from reboot' bug is solved.
117 REBOOT_TIMEOUT = 480
Chris Sosab76e0ee2013-05-22 16:55:41 -0700118
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800119 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
120 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
121 _USB_POWER_TIMEOUT = 5
122 _POWER_CYCLE_TIMEOUT = 10
123
beeps32a63082013-08-22 14:02:29 -0700124 _RPC_PROXY_URL = 'http://localhost:%d'
Christopher Wileydd181852013-10-10 19:56:58 -0700125 _RPC_SHUTDOWN_POLLING_PERIOD_SECONDS = 2
Peter Qiu4410db72014-06-05 10:32:41 -0700126 # Set shutdown timeout to account for the time for restarting the UI.
127 _RPC_SHUTDOWN_TIMEOUT_SECONDS = cros_ui.RESTART_UI_TIMEOUT
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800128
Richard Barnette82c35912012-11-20 10:09:10 -0800129 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
130 'rpm_recovery_boards', type=str).split(',')
131
132 _MAX_POWER_CYCLE_ATTEMPTS = 6
133 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
Fang Dengdeba14f2014-11-14 11:54:09 -0800134 _RPM_HOSTNAME_REGEX = ('chromeos(\d+)(-row(\d+))?-rack(\d+[a-z]*)'
135 '-host(\d+)')
Gwendal Grignoua66f1d12014-12-03 10:07:26 -0800136 _LIGHT_SENSOR_FILES = [ "in_illuminance0_input",
137 "in_illuminance_input",
138 "in_illuminance0_raw",
139 "in_illuminance_raw",
140 "illuminance0_input"]
Richard Barnette82c35912012-11-20 10:09:10 -0800141 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
142 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800143 _DETECTABLE_LABELS = []
144 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
145 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700146
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800147 # Constants used in ping_wait_up() and ping_wait_down().
148 #
149 # _PING_WAIT_COUNT is the approximate number of polling
150 # cycles to use when waiting for a host state change.
151 #
152 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
153 # for arguments to the internal _ping_wait_for_status()
154 # method.
155 _PING_WAIT_COUNT = 40
156 _PING_STATUS_DOWN = False
157 _PING_STATUS_UP = True
158
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800159 # Allowed values for the power_method argument.
160
161 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
162 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
163 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
164 POWER_CONTROL_RPM = 'RPM'
165 POWER_CONTROL_SERVO = 'servoj10'
166 POWER_CONTROL_MANUAL = 'manual'
167
168 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
169 POWER_CONTROL_SERVO,
170 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800171
Simran Basi5e6339a2013-03-21 11:34:32 -0700172 _RPM_OUTLET_CHANGED = 'outlet_changed'
173
Dan Shi9cb0eec2014-06-03 09:04:50 -0700174 # URL pattern to download firmware image.
175 _FW_IMAGE_URL_PATTERN = global_config.global_config.get_config_value(
176 'CROS', 'firmware_url_pattern', type=str)
beeps687243d2013-07-18 15:29:27 -0700177
MK Ryu35d661e2014-09-25 17:44:10 -0700178 # File that has a list of directories to be collected
179 _LOGS_TO_COLLECT_FILE = os.path.join(
180 common.client_dir, 'common_lib', 'logs_to_collect')
181
182 # Prefix of logging message w.r.t. crash collection
183 _CRASHLOGS_PREFIX = 'collect_crashlogs'
184
185 # Time duration waiting for host up/down check
186 _CHECK_HOST_UP_TIMEOUT_SECS = 15
187
188 # A command that interacts with kernel and hardware (e.g., rm, mkdir, etc)
189 # might not be completely done deep through the hardware when the machine
190 # is powered down right after the command returns.
191 # We should wait for a few seconds to make them done. Finger crossed.
192 _SAFE_WAIT_SECS = 10
193
194
J. Richard Barnette964fba02012-10-24 17:34:29 -0700195 @staticmethod
beeps46dadc92013-11-07 14:07:10 -0800196 def check_host(host, timeout=10):
197 """
198 Check if the given host is a chrome-os host.
199
200 @param host: An ssh host representing a device.
201 @param timeout: The timeout for the run command.
202
203 @return: True if the host device is chromeos.
204
beeps46dadc92013-11-07 14:07:10 -0800205 """
206 try:
Christopher Wiley1ea80942014-02-26 16:45:08 -0800207 result = host.run('grep -q CHROMEOS /etc/lsb-release && '
Simran Basie5f7ae42014-06-26 15:44:06 -0700208 '! which adb >/dev/null 2>&1 && '
209 '! grep -q moblab /etc/lsb-release',
Christopher Wileyfc3eac02013-11-21 16:24:57 -0800210 ignore_status=True, timeout=timeout)
beeps46dadc92013-11-07 14:07:10 -0800211 except (error.AutoservRunError, error.AutoservSSHTimeout):
212 return False
213 return result.exit_status == 0
214
215
216 @staticmethod
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800217 def _extract_arguments(args_dict, key_subset):
218 """Extract options from `args_dict` and return a subset result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800219
220 Take the provided dictionary of argument options and return
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800221 a subset that represent standard arguments needed to construct
222 a test-assistant object (chameleon or servo) for a host. The
223 intent is to provide standard argument processing from
224 run_remote_tests for tests that require a test-assistant board
225 to operate.
226
227 @param args_dict Dictionary from which to extract the arguments.
228 @param key_subset Tuple of keys to extract from the args_dict, e.g.
229 ('servo_host', 'servo_port').
230 """
231 result = {}
232 for arg in key_subset:
233 if arg in args_dict:
234 result[arg] = args_dict[arg]
235 return result
236
237
238 @staticmethod
239 def get_chameleon_arguments(args_dict):
240 """Extract chameleon options from `args_dict` and return the result.
241
242 Recommended usage:
243 ~~~~~~~~
244 args_dict = utils.args_to_dict(args)
245 chameleon_args = hosts.CrosHost.get_chameleon_arguments(args_dict)
246 host = hosts.create_host(machine, chameleon_args=chameleon_args)
247 ~~~~~~~~
248
249 @param args_dict Dictionary from which to extract the chameleon
250 arguments.
251 """
252 return CrosHost._extract_arguments(
253 args_dict, ('chameleon_host', 'chameleon_port'))
254
255
256 @staticmethod
257 def get_servo_arguments(args_dict):
258 """Extract servo options from `args_dict` and return the result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800259
260 Recommended usage:
261 ~~~~~~~~
262 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700263 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800264 host = hosts.create_host(machine, servo_args=servo_args)
265 ~~~~~~~~
266
267 @param args_dict Dictionary from which to extract the servo
268 arguments.
269 """
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800270 return CrosHost._extract_arguments(
271 args_dict, ('servo_host', 'servo_port'))
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700272
J. Richard Barnette964fba02012-10-24 17:34:29 -0700273
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800274 def _initialize(self, hostname, chameleon_args=None, servo_args=None,
275 ssh_verbosity_flag='', ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700276 *args, **dargs):
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800277 """Initialize superclasses, |self.chameleon|, and |self.servo|.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700278
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800279 This method checks whether a chameleon/servo (aka
280 test-assistant objects) is required by checking whether
281 chameleon_args/servo_args is None. This method will only
282 attempt to create the test-assistant object when it is
283 required by the test.
Fang Deng5d518f42013-08-02 14:04:32 -0700284
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800285 For creating the test-assistant object, there are three
286 possibilities: First, if the host is a lab system known to have
287 a test-assistant board, we connect to that board unconditionally.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700288 Second, if we're called from a control file that requires
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800289 test-assistant features for testing, it will pass settings from
290 the arguments, like `servo_host`, `servo_port`. If neither of
291 these cases apply, the test-assistant object will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700292
293 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700294 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700295 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700296 # self.env is a dictionary of environment variable settings
297 # to be exported for commands run on the host.
298 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
299 # errors that might happen.
300 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700301 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700302 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700303 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700304 # TODO(fdeng): We need to simplify the
305 # process of servo and servo_host initialization.
306 # crbug.com/298432
Dan Shi4d478522014-02-14 13:46:32 -0800307 self._servo_host = servo_host.create_servo_host(dut=self.hostname,
308 servo_args=servo_args)
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800309 # TODO(waihong): Do the simplication on Chameleon too.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800310 self._chameleon_host = chameleon_host.create_chameleon_host(
311 dut=self.hostname, chameleon_args=chameleon_args)
312
Dan Shi4d478522014-02-14 13:46:32 -0800313 if self._servo_host is not None:
314 self.servo = self._servo_host.get_servo()
315 else:
316 self.servo = None
317
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800318 if self._chameleon_host:
Tom Wai-Hong Tameaee3402014-01-22 08:52:10 +0800319 self.chameleon = self._chameleon_host.create_chameleon_board()
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800320 else:
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800321 self.chameleon = None
Fang Deng5d518f42013-08-02 14:04:32 -0700322
323
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500324 def get_repair_image_name(self):
325 """Generate a image_name from variables in the global config.
326
327 @returns a str of $board-version/$BUILD.
328
329 """
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500330 board = self._get_board_from_afe()
331 if board is None:
332 raise error.AutoservError('DUT has no board attribute, '
333 'cannot be repaired.')
Dan Shi6964fa52014-12-18 11:04:27 -0800334 stable_version = self._AFE.run('get_stable_version', board=board)
335 build_pattern = global_config.global_config.get_config_value(
336 'CROS', 'stable_build_pattern')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500337 return build_pattern % (board, stable_version)
338
339
Scott Zawalski62bacae2013-03-05 10:40:32 -0500340 def _host_in_AFE(self):
341 """Check if the host is an object the AFE knows.
342
343 @returns the host object.
344 """
345 return self._AFE.get_hosts(hostname=self.hostname)
346
347
Chris Sosab76e0ee2013-05-22 16:55:41 -0700348 def lookup_job_repo_url(self):
349 """Looks up the job_repo_url for the host.
350
351 @returns job_repo_url from AFE or None if not found.
352
353 @raises KeyError if the host does not have a job_repo_url
354 """
355 if not self._host_in_AFE():
356 return None
357
358 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700359 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
360 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700361
362
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500363 def clear_cros_version_labels_and_job_repo_url(self):
364 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500365 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400366 return
367
Scott Zawalski62bacae2013-03-05 10:40:32 -0500368 host_list = [self.hostname]
369 labels = self._AFE.get_labels(
370 name__startswith=ds_constants.VERSION_PREFIX,
371 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800372
Scott Zawalski62bacae2013-03-05 10:40:32 -0500373 for label in labels:
374 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500375
beepscb6f1e22013-06-28 19:14:10 -0700376 self.update_job_repo_url(None, None)
377
378
379 def update_job_repo_url(self, devserver_url, image_name):
380 """
381 Updates the job_repo_url host attribute and asserts it's value.
382
383 @param devserver_url: The devserver to use in the job_repo_url.
384 @param image_name: The name of the image to use in the job_repo_url.
385
386 @raises AutoservError: If we failed to update the job_repo_url.
387 """
388 repo_url = None
389 if devserver_url and image_name:
390 repo_url = tools.get_package_url(devserver_url, image_name)
391 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500392 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700393 if self.lookup_job_repo_url() != repo_url:
394 raise error.AutoservError('Failed to update job_repo_url with %s, '
395 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500396
397
Dan Shie9309262013-06-19 22:50:21 -0700398 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400399 """Add cros_version labels and host attribute job_repo_url.
400
401 @param image_name: The name of the image e.g.
402 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700403
Scott Zawalskieadbf702013-03-14 09:23:06 -0400404 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500405 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400406 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500407
Scott Zawalskieadbf702013-03-14 09:23:06 -0400408 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700409 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500410
411 labels = self._AFE.get_labels(name=cros_label)
412 if labels:
413 label = labels[0]
414 else:
415 label = self._AFE.create_label(name=cros_label)
416
417 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700418 self.update_job_repo_url(devserver_url, image_name)
419
420
beepsdae65fd2013-07-26 16:24:41 -0700421 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700422 """
423 Make sure job_repo_url of this host is valid.
424
joychen03eaad92013-06-26 09:55:21 -0700425 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700426 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
427 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
428 download and extract it. If the devserver embedded in the url is
429 unresponsive, update the job_repo_url of the host after staging it on
430 another devserver.
431
432 @param job_repo_url: A url pointing to the devserver where the autotest
433 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700434 @param tag: The tag from the server job, in the format
435 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700436
437 @raises DevServerException: If we could not resolve a devserver.
438 @raises AutoservError: If we're unable to save the new job_repo_url as
439 a result of choosing a new devserver because the old one failed to
440 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700441 @raises urllib2.URLError: If the devserver embedded in job_repo_url
442 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700443 """
444 job_repo_url = self.lookup_job_repo_url()
445 if not job_repo_url:
446 logging.warning('No job repo url set on host %s', self.hostname)
447 return
448
449 logging.info('Verifying job repo url %s', job_repo_url)
450 devserver_url, image_name = tools.get_devserver_build_from_package_url(
451 job_repo_url)
452
beeps0c865032013-07-30 11:37:06 -0700453 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700454
455 logging.info('Staging autotest artifacts for %s on devserver %s',
456 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700457
458 start_time = time.time()
Simran Basi25e7a922014-10-31 11:56:10 -0700459 ds.stage_artifacts(image_name, ['autotest_packages'])
beeps687243d2013-07-18 15:29:27 -0700460 stage_time = time.time() - start_time
461
462 # Record how much of the verification time comes from a devserver
463 # restage. If we're doing things right we should not see multiple
464 # devservers for a given board/build/branch path.
465 try:
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800466 board, build_type, branch = server_utils.ParseBuildName(
beeps687243d2013-07-18 15:29:27 -0700467 image_name)[:3]
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800468 except server_utils.ParseBuildNameException:
beeps687243d2013-07-18 15:29:27 -0700469 pass
470 else:
beeps0c865032013-07-30 11:37:06 -0700471 devserver = devserver_url[
Chris Sosa65425082013-10-16 13:26:22 -0700472 devserver_url.find('/') + 2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700473 stats_key = {
474 'board': board,
475 'build_type': build_type,
476 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700477 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700478 }
479 stats.Gauge('verify_job_repo_url').send(
480 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
481 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700482
Scott Zawalskieadbf702013-03-14 09:23:06 -0400483
Dan Shi0f466e82013-02-22 15:44:58 -0800484 def _try_stateful_update(self, update_url, force_update, updater):
485 """Try to use stateful update to initialize DUT.
486
487 When DUT is already running the same version that machine_install
488 tries to install, stateful update is a much faster way to clean up
489 the DUT for testing, compared to a full reimage. It is implemeted
490 by calling autoupdater.run_update, but skipping updating root, as
491 updating the kernel is time consuming and not necessary.
492
493 @param update_url: url of the image.
494 @param force_update: Set to True to update the image even if the DUT
495 is running the same version.
496 @param updater: ChromiumOSUpdater instance used to update the DUT.
497 @returns: True if the DUT was updated with stateful update.
498
499 """
J. Richard Barnette3f731032014-04-07 17:42:59 -0700500 # TODO(jrbarnette): Yes, I hate this re.match() test case.
501 # It's better than the alternative: see crbug.com/360944.
502 image_name = autoupdater.url_to_image_name(update_url)
503 release_pattern = r'^.*-release/R[0-9]+-[0-9]+\.[0-9]+\.0$'
504 if not re.match(release_pattern, image_name):
505 return False
Dan Shi0f466e82013-02-22 15:44:58 -0800506 if not updater.check_version():
507 return False
508 if not force_update:
509 logging.info('Canceling stateful update because the new and '
510 'old versions are the same.')
511 return False
512 # Following folders should be rebuilt after stateful update.
513 # A test file is used to confirm each folder gets rebuilt after
514 # the stateful update.
515 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
516 test_file = '.test_file_to_be_deleted'
517 for folder in folders_to_check:
518 touch_path = os.path.join(folder, test_file)
519 self.run('touch %s' % touch_path)
520
521 if not updater.run_update(force_update=True, update_root=False):
522 return False
523
524 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700525 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800526 check_file_cmd = 'test -f %s; echo $?'
527 for folder in folders_to_check:
528 test_file_path = os.path.join(folder, test_file)
529 result = self.run(check_file_cmd % test_file_path,
530 ignore_status=True)
531 if result.exit_status == 1:
532 return False
533 return True
534
535
J. Richard Barnette7275b612013-06-04 18:13:11 -0700536 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800537 """After the DUT is updated, confirm machine_install succeeded.
538
539 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700540 @param expected_kernel: kernel expected to be active after reboot,
541 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800542
543 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700544 # Touch the lab machine file to leave a marker that
545 # distinguishes this image from other test images.
546 # Afterwards, we must re-run the autoreboot script because
547 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800548 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800549 self.run('start autoreboot')
Chris Sosa65425082013-10-16 13:26:22 -0700550 updater.verify_boot_expectations(
551 expected_kernel, rollback_message=
552 'Build %s failed to boot on %s; system rolled back to previous'
553 'build' % (updater.update_version, self.hostname))
J. Richard Barnette7275b612013-06-04 18:13:11 -0700554 # Check that we've got the build we meant to install.
555 if not updater.check_version_to_confirm_install():
556 raise autoupdater.ChromiumOSError(
557 'Failed to update %s to build %s; found build '
558 '%s instead' % (self.hostname,
Chris Sosa65425082013-10-16 13:26:22 -0700559 updater.update_version,
560 updater.get_build_id()))
Dan Shi0f466e82013-02-22 15:44:58 -0800561
562
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700563 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400564 """Stage a build on a devserver and return the update_url.
565
566 @param image_name: a name like lumpy-release/R27-3837.0.0
567 @returns an update URL like:
568 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
569 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700570 if not image_name:
571 image_name = self.get_repair_image_name()
572 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400573 devserver = dev_server.ImageServer.resolve(image_name)
574 devserver.trigger_download(image_name, synchronous=False)
575 return tools.image_url_pattern() % (devserver.url(), image_name)
576
577
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700578 def stage_image_for_servo(self, image_name=None):
579 """Stage a build on a devserver and return the update_url.
580
581 @param image_name: a name like lumpy-release/R27-3837.0.0
582 @returns an update URL like:
583 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
584 """
585 if not image_name:
586 image_name = self.get_repair_image_name()
587 logging.info('Staging build for servo install: %s', image_name)
588 devserver = dev_server.ImageServer.resolve(image_name)
589 devserver.stage_artifacts(image_name, ['test_image'])
590 return devserver.get_test_image_url(image_name)
591
592
beepse539be02013-07-31 21:57:39 -0700593 def stage_factory_image_for_servo(self, image_name):
594 """Stage a build on a devserver and return the update_url.
595
596 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700597
beepse539be02013-07-31 21:57:39 -0700598 @return: An update URL, eg:
599 http://<devserver>/static/canary-channel/\
600 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700601
602 @raises: ValueError if the factory artifact name is missing from
603 the config.
604
beepse539be02013-07-31 21:57:39 -0700605 """
606 if not image_name:
607 logging.error('Need an image_name to stage a factory image.')
608 return
609
beeps12c0a3c2013-09-03 11:58:27 -0700610 factory_artifact = global_config.global_config.get_config_value(
611 'CROS', 'factory_artifact', type=str, default='')
612 if not factory_artifact:
613 raise ValueError('Cannot retrieve the factory artifact name from '
614 'autotest config, and hence cannot stage factory '
615 'artifacts.')
616
beepse539be02013-07-31 21:57:39 -0700617 logging.info('Staging build for servo install: %s', image_name)
618 devserver = dev_server.ImageServer.resolve(image_name)
619 devserver.stage_artifacts(
620 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700621 [factory_artifact],
622 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700623
624 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
625
626
Chris Sosaa3ac2152012-05-23 22:23:13 -0700627 def machine_install(self, update_url=None, force_update=False,
Fang Deng3d3b9272014-12-22 12:20:28 -0800628 local_devserver=False, repair=False,
629 force_full_update=False):
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500630 """Install the DUT.
631
Dan Shi0f466e82013-02-22 15:44:58 -0800632 Use stateful update if the DUT is already running the same build.
633 Stateful update does not update kernel and tends to run much faster
634 than a full reimage. If the DUT is running a different build, or it
635 failed to do a stateful update, full update, including kernel update,
636 will be applied to the DUT.
637
Scott Zawalskieadbf702013-03-14 09:23:06 -0400638 Once a host enters machine_install its cros_version label will be
639 removed as well as its host attribute job_repo_url (used for
640 package install).
641
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500642 @param update_url: The url to use for the update
643 pattern: http://$devserver:###/update/$build
644 If update_url is None and repair is True we will install the
Dan Shi6964fa52014-12-18 11:04:27 -0800645 stable image listed in afe_stable_versions table. If the table
646 is not setup, global_config value under CROS.stable_cros_version
647 will be used instead.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500648 @param force_update: Force an update even if the version installed
649 is the same. Default:False
650 @param local_devserver: Used by run_remote_test to allow people to
651 use their local devserver. Default: False
652 @param repair: Whether or not we are in repair mode. This adds special
653 cases for repairing a machine like starting update_engine.
654 Setting repair to True sets force_update to True as well.
655 default: False
Fang Deng3d3b9272014-12-22 12:20:28 -0800656 @param force_full_update: If True, do not attempt to run stateful
657 update, force a full reimage. If False, try stateful update
658 first when the dut is already installed with the same version.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500659 @raises autoupdater.ChromiumOSError
660
661 """
Dan Shi7458bf62013-06-10 12:50:16 -0700662 if update_url:
663 logging.debug('update url is set to %s', update_url)
664 else:
665 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700666 if self._parser.options.image:
667 requested_build = self._parser.options.image
668 if requested_build.startswith('http://'):
669 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700670 logging.debug('update url is retrieved from requested_build'
671 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700672 else:
673 # Try to stage any build that does not start with
674 # http:// on the devservers defined in
675 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700676 update_url = self._stage_image_for_update(requested_build)
677 logging.debug('Build staged, and update_url is set to: %s',
678 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700679 elif repair:
680 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700681 logging.debug('Build staged, and update_url is set to: %s',
682 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400683 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700684 raise autoupdater.ChromiumOSError(
685 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500686
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500687 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800688 # In case the system is in a bad state, we always reboot the machine
689 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700690 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500691 self.run('stop update-engine; start update-engine')
692 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800693
Chris Sosaa3ac2152012-05-23 22:23:13 -0700694 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700695 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800696 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400697 # Remove cros-version and job_repo_url host attribute from host.
698 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800699 # If the DUT is already running the same build, try stateful update
700 # first. Stateful update does not update kernel and tends to run much
701 # faster than a full reimage.
Fang Deng3d3b9272014-12-22 12:20:28 -0800702 if not force_full_update:
703 try:
704 updated = self._try_stateful_update(
705 update_url, force_update, updater)
706 if updated:
707 logging.info('DUT is updated with stateful update.')
708 except Exception as e:
709 logging.exception(e)
710 logging.warning('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700711
Dan Shi0f466e82013-02-22 15:44:58 -0800712 inactive_kernel = None
713 # Do a full update if stateful update is not applicable or failed.
714 if not updated:
Chris Sosab7612bc2013-03-21 10:32:37 -0700715 # TODO(sosa): Remove temporary hack to get rid of bricked machines
716 # that can't update due to a corrupted policy.
717 self.run('rm -rf /var/lib/whitelist')
Danny Chanad490bd2014-11-04 14:15:57 -0800718 self.run('mkdir /var/lib/whitelist')
Chris Sosab7612bc2013-03-21 10:32:37 -0700719 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400720 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700721
Dan Shi0f466e82013-02-22 15:44:58 -0800722 if updater.run_update(force_update):
723 updated = True
724 # Figure out active and inactive kernel.
725 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700726
Dan Shi0f466e82013-02-22 15:44:58 -0800727 # Ensure inactive kernel has higher priority than active.
728 if (updater.get_kernel_priority(inactive_kernel)
729 < updater.get_kernel_priority(active_kernel)):
730 raise autoupdater.ChromiumOSError(
731 'Update failed. The priority of the inactive kernel'
732 ' partition is less than that of the active kernel'
733 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700734
Dan Shi0f466e82013-02-22 15:44:58 -0800735 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700736 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi5699ac22014-12-19 10:55:49 -0800737
738 if updated:
Dan Shi0f466e82013-02-22 15:44:58 -0800739 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400740 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700741 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800742
Simran Basiae08c8c2014-09-02 11:17:26 -0700743 logging.debug('Cleaning up old autotest directories.')
744 try:
745 installed_autodir = autotest.Autotest.get_installed_autodir(self)
746 self.run('rm -rf ' + installed_autodir)
747 except autotest.AutodirNotFoundError:
748 logging.debug('No autotest installed directory found.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700749
750
Dan Shi9cb0eec2014-06-03 09:04:50 -0700751 def _clear_fw_version_labels(self):
752 """Clear firmware version labels from the machine."""
753 labels = self._AFE.get_labels(
754 name__startswith=provision.FW_VERSION_PREFIX,
755 host__hostname=self.hostname)
756 for label in labels:
757 label.remove_hosts(hosts=[self.hostname])
758
759
760 def _add_fw_version_label(self, build):
761 """Add firmware version label to the machine.
762
763 @param build: Build of firmware.
764
765 """
766 fw_label = provision.fw_version_to_label(build)
767 provision.ensure_label_exists(fw_label)
768 label = self._AFE.get_labels(name__startswith=fw_label)[0]
769 label.add_hosts([self.hostname])
770
771
772 def firmware_install(self, build=None):
773 """Install firmware to the DUT.
774
775 Use stateful update if the DUT is already running the same build.
776 Stateful update does not update kernel and tends to run much faster
777 than a full reimage. If the DUT is running a different build, or it
778 failed to do a stateful update, full update, including kernel update,
779 will be applied to the DUT.
780
781 Once a host enters firmware_install its fw_version label will be
782 removed. After the firmware is updated successfully, a new fw_version
783 label will be added to the host.
784
785 @param build: The build version to which we want to provision the
786 firmware of the machine,
787 e.g. 'link-firmware/R22-2695.1.144'.
788
789 TODO(dshi): After bug 381718 is fixed, update here with corresponding
790 exceptions that could be raised.
791
792 """
793 if not self.servo:
794 raise error.TestError('Host %s does not have servo.' %
795 self.hostname)
796
797 # TODO(fdeng): use host.get_board() after
798 # crbug.com/271834 is fixed.
799 board = self._get_board_from_afe()
800
801 # If build is not set, assume it's repair mode and try to install
802 # firmware from stable CrOS.
803 if not build:
804 build = self.get_repair_image_name()
805
806 config = FAFTConfig(board)
807 if config.use_u_boot:
808 ap_image = 'image-%s.bin' % board
809 else: # Depthcharge platform
810 ap_image = 'image.bin'
811 ec_image = 'ec.bin'
812 ds = dev_server.ImageServer.resolve(build)
813 ds.stage_artifacts(build, ['firmware'])
814
815 tmpd = autotemp.tempdir(unique_id='fwimage')
816 try:
817 fwurl = self._FW_IMAGE_URL_PATTERN % (ds.url(), build)
818 local_tarball = os.path.join(tmpd.name, os.path.basename(fwurl))
819 server_utils.system('wget -O %s %s' % (local_tarball, fwurl),
820 timeout=60)
821 server_utils.system('tar xf %s -C %s %s %s' %
822 (local_tarball, tmpd.name, ap_image, ec_image),
823 timeout=60)
824 server_utils.system('tar xf %s --wildcards -C %s "dts/*"' %
825 (local_tarball, tmpd.name),
826 timeout=60, ignore_status=True)
827
828 self._clear_fw_version_labels()
829 logging.info('Will re-program EC now')
830 self.servo.program_ec(os.path.join(tmpd.name, ec_image))
831 logging.info('Will re-program BIOS now')
832 self.servo.program_bios(os.path.join(tmpd.name, ap_image))
833 self.servo.get_power_state_controller().reset()
834 time.sleep(self.servo.BOOT_DELAY)
835 self._add_fw_version_label()
836 finally:
837 tmpd.clean()
838
839
Dan Shi10e992b2013-08-30 11:02:59 -0700840 def show_update_engine_log(self):
841 """Output update engine log."""
MK Ryu35d661e2014-09-25 17:44:10 -0700842 logging.debug('Dumping %s', client_constants.UPDATE_ENGINE_LOG)
843 self.run('cat %s' % client_constants.UPDATE_ENGINE_LOG)
Dan Shi10e992b2013-08-30 11:02:59 -0700844
845
Richard Barnette82c35912012-11-20 10:09:10 -0800846 def _get_board_from_afe(self):
847 """Retrieve this host's board from its labels in the AFE.
848
849 Looks for a host label of the form "board:<board>", and
850 returns the "<board>" part of the label. `None` is returned
851 if there is not a single, unique label matching the pattern.
852
853 @returns board from label, or `None`.
854 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700855 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800856
857
858 def get_build(self):
859 """Retrieve the current build for this Host from the AFE.
860
861 Looks through this host's labels in the AFE to determine its build.
862
863 @returns The current build or None if it could not find it or if there
864 were multiple build labels assigned to this host.
865 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700866 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800867
868
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500869 def _install_repair(self):
870 """Attempt to repair this host using upate-engine.
871
872 If the host is up, try installing the DUT with a stable
Dan Shi6964fa52014-12-18 11:04:27 -0800873 "repair" version of Chrome OS as defined in afe_stable_versions table.
874 If the table is not setup, global_config value under
875 CROS.stable_cros_version will be used instead.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500876
Scott Zawalski62bacae2013-03-05 10:40:32 -0500877 @raises AutoservRepairMethodNA if the DUT is not reachable.
878 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500879
880 """
881 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500882 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500883 logging.info('Attempting to reimage machine to repair image.')
884 try:
885 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700886 except autoupdater.ChromiumOSError as e:
887 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500888 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500889 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500890
891
Dan Shi2c88eed2013-11-12 10:18:38 -0800892 def _install_repair_with_powerwash(self):
Dan Shi9cc48452013-11-12 12:39:26 -0800893 """Attempt to powerwash first then repair this host using update-engine.
Dan Shi2c88eed2013-11-12 10:18:38 -0800894
Dan Shi9cc48452013-11-12 12:39:26 -0800895 update-engine may fail due to a bad image. In such case, powerwash
896 may help to cleanup the DUT for update-engine to work again.
Dan Shi2c88eed2013-11-12 10:18:38 -0800897
898 @raises AutoservRepairMethodNA if the DUT is not reachable.
899 @raises ChromiumOSError if the install failed for some reason.
900
901 """
902 if not self.is_up():
903 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
904
905 logging.info('Attempting to powerwash the DUT.')
906 self.run('echo "fast safe" > '
907 '/mnt/stateful_partition/factory_install_reset')
908 self.reboot(timeout=self.POWERWASH_BOOT_TIMEOUT, wait=True)
909 if not self.is_up():
Dan Shi9cc48452013-11-12 12:39:26 -0800910 logging.error('Powerwash failed. DUT did not come back after '
Dan Shi2c88eed2013-11-12 10:18:38 -0800911 'reboot.')
912 raise error.AutoservRepairFailure(
913 'DUT failed to boot from powerwash after %d seconds' %
914 self.POWERWASH_BOOT_TIMEOUT)
915
916 logging.info('Powerwash succeeded.')
917 self._install_repair()
918
919
beepsf079cfb2013-09-18 17:49:51 -0700920 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
921 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500922 """
923 Re-install the OS on the DUT by:
924 1) installing a test image on a USB storage device attached to the Servo
925 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800926 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700927 3) installing the image with chromeos-install.
928
Scott Zawalski62bacae2013-03-05 10:40:32 -0500929 @param image_url: If specified use as the url to install on the DUT.
930 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700931 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
932 Factory images need a longer usb_boot_timeout than regular
933 cros images.
934 @param install_timeout: The timeout to use when installing the chromeos
935 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800936
Scott Zawalski62bacae2013-03-05 10:40:32 -0500937 @raises AutoservError if the image fails to boot.
beepsf079cfb2013-09-18 17:49:51 -0700938
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800939 """
beepsf079cfb2013-09-18 17:49:51 -0700940 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
941 % usb_boot_timeout)
942 logging.info('Downloading image to USB, then booting from it. Usb boot '
943 'timeout = %s', usb_boot_timeout)
944 timer = stats.Timer(usb_boot_timer_key)
945 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700946 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700947 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500948 raise error.AutoservRepairFailure(
949 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700950 usb_boot_timeout)
951 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500952
beepsf079cfb2013-09-18 17:49:51 -0700953 install_timer_key = ('servo_install.install_timeout_%s'
954 % install_timeout)
955 timer = stats.Timer(install_timer_key)
956 timer.start()
957 logging.info('Installing image through chromeos-install.')
MK Ryu35d661e2014-09-25 17:44:10 -0700958 self.run('chromeos-install --yes --lab_preserve_logs=%s' %
959 self._LOGS_TO_COLLECT_FILE,
960 timeout=install_timeout)
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800961 self.run('halt')
beepsf079cfb2013-09-18 17:49:51 -0700962 timer.stop()
963
964 logging.info('Power cycling DUT through servo.')
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800965 self.servo.get_power_state_controller().power_off()
Fang Dengafb88142013-05-30 17:44:31 -0700966 self.servo.switch_usbkey('off')
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800967 # N.B. The Servo API requires that we use power_on() here
968 # for two reasons:
969 # 1) After turning on a DUT in recovery mode, you must turn
970 # it off and then on with power_on() once more to
971 # disable recovery mode (this is a Parrot specific
972 # requirement).
973 # 2) After power_off(), the only way to turn on is with
974 # power_on() (this is a Storm specific requirement).
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700975 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -0700976
977 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800978 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
979 raise error.AutoservError('DUT failed to reboot installed '
980 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500981 self.BOOT_TIMEOUT)
982
983
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700984 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500985 """Reinstall the DUT utilizing servo and a test image.
986
987 Re-install the OS on the DUT by:
988 1) installing a test image on a USB storage device attached to the Servo
989 board,
990 2) booting that image in recovery mode, and then
991 3) installing the image with chromeos-install.
992
Scott Zawalski62bacae2013-03-05 10:40:32 -0500993 @raises AutoservRepairMethodNA if the device does not have servo
994 support.
995
996 """
997 if not self.servo:
998 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
999 'DUT has no servo support.')
1000
1001 logging.info('Attempting to recovery servo enabled device with '
1002 'servo_repair_reinstall')
1003
J. Richard Barnettee4af8b92013-05-01 13:16:12 -07001004 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001005 self.servo_install(image_url)
1006
1007
1008 def _servo_repair_power(self):
1009 """Attempt to repair DUT using an attached Servo.
1010
1011 Attempt to power on the DUT via power_long_press.
1012
1013 @raises AutoservRepairMethodNA if the device does not have servo
1014 support.
1015 @raises AutoservRepairFailure if the repair fails for any reason.
1016 """
1017 if not self.servo:
1018 raise error.AutoservRepairMethodNA('Repair Power NA: '
1019 'DUT has no servo support.')
1020
1021 logging.info('Attempting to recover servo enabled device by '
1022 'powering it off and on.')
1023 self.servo.get_power_state_controller().power_off()
1024 self.servo.get_power_state_controller().power_on()
1025 if self.wait_up(self.BOOT_TIMEOUT):
1026 return
1027
1028 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -08001029
1030
Richard Barnette82c35912012-11-20 10:09:10 -08001031 def _powercycle_to_repair(self):
1032 """Utilize the RPM Infrastructure to bring the host back up.
1033
1034 If the host is not up/repaired after the first powercycle we utilize
1035 auto fallback to the last good install by powercycling and rebooting the
1036 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001037
1038 @raises AutoservRepairMethodNA if the device does not support remote
1039 power.
1040 @raises AutoservRepairFailure if the repair fails for any reason.
1041
Richard Barnette82c35912012-11-20 10:09:10 -08001042 """
Scott Zawalski62bacae2013-03-05 10:40:32 -05001043 if not self.has_power():
1044 raise error.AutoservRepairMethodNA('Device does not support power.')
1045
Richard Barnette82c35912012-11-20 10:09:10 -08001046 logging.info('Attempting repair via RPM powercycle.')
1047 failed_cycles = 0
1048 self.power_cycle()
1049 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
1050 failed_cycles += 1
1051 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -05001052 raise error.AutoservRepairFailure(
1053 'Powercycled host %s %d times; device did not come back'
1054 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -08001055 self.power_cycle()
1056 if failed_cycles == 0:
1057 logging.info('Powercycling was successful first time.')
1058 else:
1059 logging.info('Powercycling was successful after %d failures.',
1060 failed_cycles)
1061
1062
MK Ryu35d661e2014-09-25 17:44:10 -07001063 def _reboot_repair(self):
1064 """SSH to this host and reboot."""
1065 if not self.is_up(self._CHECK_HOST_UP_TIMEOUT_SECS):
1066 raise error.AutoservRepairMethodNA('DUT unreachable for reboot.')
1067 logging.info('Attempting repair via SSH reboot.')
1068 self.reboot(timeout=self.BOOT_TIMEOUT, wait=True)
1069
1070
Prashanth B4d8184f2014-05-05 12:22:02 -07001071 def check_device(self):
1072 """Check if a device is ssh-able, and if so, clean and verify it.
1073
1074 @raise AutoservSSHTimeout: If the ssh ping times out.
1075 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
1076 permissions.
1077 @raise AutoservSshPingHostError: For other AutoservRunErrors during
1078 ssh_ping.
1079 @raises AutoservError: As appropriate, during cleanup and verify.
1080 """
1081 self.ssh_ping()
1082 self.cleanup()
1083 self.verify()
1084
1085
Richard Barnette82c35912012-11-20 10:09:10 -08001086 def repair_full(self):
1087 """Repair a host for repair level NO_PROTECTION.
1088
1089 This overrides the base class function for repair; it does
1090 not call back to the parent class, but instead offers a
1091 simplified implementation based on the capabilities in the
1092 Chrome OS test lab.
1093
Fang Deng5d518f42013-08-02 14:04:32 -07001094 It first verifies and repairs servo if it is a DUT in CrOS
Fang Deng03590af2013-10-07 17:34:20 -07001095 lab and a servo is attached.
Fang Deng5d518f42013-08-02 14:04:32 -07001096
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001097 This escalates in order through the following procedures and verifies
1098 the status using `self.check_device()` after each of them. This is done
1099 until both the repair and the veryfing step succeed.
1100
MK Ryu35d661e2014-09-25 17:44:10 -07001101 Escalation order of repair procedures from less intrusive to
1102 more intrusive repairs:
1103 1. SSH to the DUT and reboot.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001104 2. If there's a servo for the DUT, try to power the DUT off and
1105 on.
MK Ryu35d661e2014-09-25 17:44:10 -07001106 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -08001107 by power-cycling.
MK Ryu35d661e2014-09-25 17:44:10 -07001108 4. Try to re-install to a known stable image using
1109 auto-update.
1110 5. If there's a servo for the DUT, try to re-install via
1111 the servo.
Richard Barnette82c35912012-11-20 10:09:10 -08001112
1113 As with the parent method, the last operation performed on
Prashanth B4d8184f2014-05-05 12:22:02 -07001114 the DUT must be to call `self.check_device()`; If that call fails the
1115 exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -07001116
Scott Zawalski62bacae2013-03-05 10:40:32 -05001117 @raises AutoservRepairTotalFailure if the repair process fails to
1118 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -07001119 @raises ServoHostRepairTotalFailure if the repair process fails to
1120 fix the servo host if one is attached to the DUT.
1121 @raises AutoservSshPermissionDeniedError if it is unable
1122 to ssh to the servo host due to permission error.
1123
Richard Barnette82c35912012-11-20 10:09:10 -08001124 """
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001125 # Caution: Deleting shards relies on repair to always reboot the DUT.
1126
Dan Shi4d478522014-02-14 13:46:32 -08001127 if self._servo_host and not self.servo:
Fang Deng03590af2013-10-07 17:34:20 -07001128 try:
Dan Shi4d478522014-02-14 13:46:32 -08001129 self._servo_host.repair_full()
Fang Deng03590af2013-10-07 17:34:20 -07001130 except Exception as e:
Fang Deng03590af2013-10-07 17:34:20 -07001131 logging.error('Could not create a healthy servo: %s', e)
Dan Shi4d478522014-02-14 13:46:32 -08001132 self.servo = self._servo_host.get_servo()
Fang Deng5d518f42013-08-02 14:04:32 -07001133
MK Ryu35d661e2014-09-25 17:44:10 -07001134 self.try_collect_crashlogs()
1135
Scott Zawalski62bacae2013-03-05 10:40:32 -05001136 # TODO(scottz): This should use something similar to label_decorator,
1137 # but needs to be populated in order so DUTs are repaired with the
1138 # least amount of effort.
MK Ryu35d661e2014-09-25 17:44:10 -07001139 repair_funcs = [self._reboot_repair,
1140 self._servo_repair_power,
1141 self._powercycle_to_repair,
Dan Shi849a1c42014-03-05 11:10:43 -08001142 self._install_repair,
Dan Shi2c88eed2013-11-12 10:18:38 -08001143 self._install_repair_with_powerwash,
MK Ryu35d661e2014-09-25 17:44:10 -07001144 self._servo_repair_reinstall]
Scott Zawalski62bacae2013-03-05 10:40:32 -05001145 errors = []
Simran Basie6130932013-10-01 14:07:52 -07001146 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001147 for repair_func in repair_funcs:
1148 try:
1149 repair_func()
MK Ryu35d661e2014-09-25 17:44:10 -07001150 self.try_collect_crashlogs()
Prashanth B4d8184f2014-05-05 12:22:02 -07001151 self.check_device()
Simran Basie6130932013-10-01 14:07:52 -07001152 stats.Counter(
1153 '%s.SUCCEEDED' % repair_func.__name__).increment()
1154 if board:
1155 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001156 '%s.%s.SUCCEEDED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001157 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001158 return
Simran Basie6130932013-10-01 14:07:52 -07001159 except error.AutoservRepairMethodNA as e:
1160 stats.Counter(
1161 '%s.RepairNA' % repair_func.__name__).increment()
1162 if board:
1163 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001164 '%s.%s.RepairNA' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001165 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001166 logging.warning('Repair function NA: %s', e)
Simran Basie6130932013-10-01 14:07:52 -07001167 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -05001168 except Exception as e:
Simran Basie6130932013-10-01 14:07:52 -07001169 stats.Counter(
1170 '%s.FAILED' % repair_func.__name__).increment()
1171 if board:
1172 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001173 '%s.%s.FAILED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001174 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001175 logging.warning('Failed to repair device: %s', e)
Scott Zawalski62bacae2013-03-05 10:40:32 -05001176 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -05001177
Simran Basie6130932013-10-01 14:07:52 -07001178 stats.Counter('Full_Repair_Failed').increment()
1179 if board:
1180 stats.Counter(
1181 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001182 raise error.AutoservRepairTotalFailure(
1183 'All attempts at repairing the device failed:\n%s' %
1184 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -08001185
1186
MK Ryu35d661e2014-09-25 17:44:10 -07001187 def try_collect_crashlogs(self, check_host_up=True):
1188 """
1189 Check if a host is up and logs need to be collected from the host,
1190 if yes, collect them.
1191
1192 @param check_host_up: Flag for checking host is up. Default is True.
1193 """
1194 try:
1195 crash_job = self._need_crash_logs()
1196 if crash_job:
1197 logging.debug('%s: Job %s was crashed', self._CRASHLOGS_PREFIX,
1198 crash_job)
1199 if not check_host_up or self.is_up(
1200 self._CHECK_HOST_UP_TIMEOUT_SECS):
1201 self._collect_crashlogs(crash_job)
1202 logging.debug('%s: Completed collecting logs for the '
1203 'crashed job %s', self._CRASHLOGS_PREFIX,
1204 crash_job)
1205 except Exception as e:
1206 # Exception should not result in repair failure.
1207 # Therefore, suppress all exceptions here.
1208 logging.error('%s: Failed while trying to collect crash-logs: %s',
1209 self._CRASHLOGS_PREFIX, e)
1210
1211
1212 def _need_crash_logs(self):
1213 """Get the value of need_crash_logs attribute of this host.
1214
1215 @return: Value string of need_crash_logs attribute
1216 None if there is no need_crash_logs attribute
1217 """
1218 attrs = self._AFE.get_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1219 hostname=self.hostname)
1220 assert len(attrs) < 2
1221 return attrs[0].value if attrs else None
1222
1223
1224 def _collect_crashlogs(self, job_id):
1225 """Grab logs from the host where a job was crashed.
1226
1227 First, check if PRIOR_LOGS_DIR exists in the host.
1228 If yes, collect them.
1229 Otherwise, check if a lab-machine marker (_LAB_MACHINE_FILE) exists
1230 in the host.
1231 If yes, the host was repaired automatically, and we collect normal
1232 system logs.
1233
1234 @param job_id: Id of the job that was crashed.
1235 """
1236 crashlogs_dir = crashcollect.get_crashinfo_dir(self,
1237 constants.CRASHLOGS_DEST_DIR_PREFIX)
1238 flag_prior_logs = False
1239
1240 if self.path_exists(client_constants.PRIOR_LOGS_DIR):
1241 flag_prior_logs = True
1242 self._collect_prior_logs(crashlogs_dir)
1243 elif self.path_exists(self._LAB_MACHINE_FILE):
1244 self._collect_system_logs(crashlogs_dir)
1245 else:
1246 logging.warning('%s: Host was manually re-installed without '
1247 '--lab_preserve_log option. Skip collecting '
1248 'crash-logs.', self._CRASHLOGS_PREFIX)
1249
1250 # We make crash collection be one-time effort.
1251 # _collect_prior_logs() and _collect_system_logs() will not throw
1252 # any exception, and following codes will be executed even when
1253 # those methods fail.
1254 # _collect_crashlogs() is called only when the host is up (refer
1255 # to try_collect_crashlogs()). We assume _collect_prior_logs() and
1256 # _collect_system_logs() fail rarely when the host is up.
1257 # In addition, it is not clear how many times we should try crash
1258 # collection again while not triggering next repair unnecessarily.
1259 # Threfore, we try crash collection one time.
1260
1261 # Create a marker file as soon as log collection is done.
1262 # Leave the job id to this marker for gs_offloader to consume.
1263 marker_file = os.path.join(crashlogs_dir, constants.CRASHLOGS_MARKER)
1264 with open(marker_file, 'a') as f:
1265 f.write('%s\n' % job_id)
1266
1267 # Remove need_crash_logs attribute
1268 logging.debug('%s: Remove attribute need_crash_logs from host %s',
1269 self._CRASHLOGS_PREFIX, self.hostname)
1270 self._AFE.set_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1271 None, hostname=self.hostname)
1272
1273 if flag_prior_logs:
1274 logging.debug('%s: Remove %s from host %s', self._CRASHLOGS_PREFIX,
1275 client_constants.PRIOR_LOGS_DIR, self.hostname)
1276 self.run('rm -rf %s; sync' % client_constants.PRIOR_LOGS_DIR)
1277 # Wait for a few seconds to make sure the prior command is
1278 # done deep through storage.
1279 time.sleep(self._SAFE_WAIT_SECS)
1280
1281
1282 def _collect_prior_logs(self, crashlogs_dir):
1283 """Grab prior logs that were stashed before re-installing a host.
1284
1285 @param crashlogs_dir: Directory path where crash-logs are stored.
1286 """
1287 logging.debug('%s: Found %s, collecting them...',
1288 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1289 try:
1290 self.collect_logs(client_constants.PRIOR_LOGS_DIR,
1291 crashlogs_dir, False)
1292 logging.debug('%s: %s is collected',
1293 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1294 except Exception as e:
1295 logging.error('%s: Failed to collect %s: %s',
1296 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR,
1297 e)
1298
1299
1300 def _collect_system_logs(self, crashlogs_dir):
1301 """Grab normal system logs from a host.
1302
1303 @param crashlogs_dir: Directory path where crash-logs are stored.
1304 """
1305 logging.debug('%s: Found %s, collecting system logs...',
1306 self._CRASHLOGS_PREFIX, self._LAB_MACHINE_FILE)
1307 sources = server_utils.parse_simple_config(self._LOGS_TO_COLLECT_FILE)
1308 for src in sources:
1309 try:
1310 if self.path_exists(src):
1311 logging.debug('%s: Collecting %s...',
1312 self._CRASHLOGS_PREFIX, src)
1313 dest = server_utils.concat_path_except_last(
1314 crashlogs_dir, src)
1315 self.collect_logs(src, dest, False)
1316 logging.debug('%s: %s is collected',
1317 self._CRASHLOGS_PREFIX, src)
1318 except Exception as e:
1319 logging.error('%s: Failed to collect %s: %s',
1320 self._CRASHLOGS_PREFIX, src, e)
1321
1322
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001323 def close(self):
beeps32a63082013-08-22 14:02:29 -07001324 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -07001325 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001326
1327
Dan Shi49ca0932014-11-14 11:22:27 -08001328 def get_power_supply_info(self):
1329 """Get the output of power_supply_info.
1330
1331 power_supply_info outputs the info of each power supply, e.g.,
1332 Device: Line Power
1333 online: no
1334 type: Mains
1335 voltage (V): 0
1336 current (A): 0
1337 Device: Battery
1338 state: Discharging
1339 percentage: 95.9276
1340 technology: Li-ion
1341
1342 Above output shows two devices, Line Power and Battery, with details of
1343 each device listed. This function parses the output into a dictionary,
1344 with key being the device name, and value being a dictionary of details
1345 of the device info.
1346
1347 @return: The dictionary of power_supply_info, e.g.,
1348 {'Line Power': {'online': 'yes', 'type': 'main'},
1349 'Battery': {'vendor': 'xyz', 'percentage': '100'}}
Dan Shie9b765d2014-12-29 16:59:49 -08001350 @raise error.AutoservRunError if power_supply_info tool is not found in
1351 the DUT. Caller should handle this error to avoid false failure
1352 on verification.
Dan Shi49ca0932014-11-14 11:22:27 -08001353 """
1354 result = self.run('power_supply_info').stdout.strip()
1355 info = {}
1356 device_name = None
1357 device_info = {}
1358 for line in result.split('\n'):
1359 pair = [v.strip() for v in line.split(':')]
1360 if len(pair) != 2:
1361 continue
1362 if pair[0] == 'Device':
1363 if device_name:
1364 info[device_name] = device_info
1365 device_name = pair[1]
1366 device_info = {}
1367 else:
1368 device_info[pair[0]] = pair[1]
1369 if device_name and not device_name in info:
1370 info[device_name] = device_info
1371 return info
1372
1373
1374 def get_battery_percentage(self):
1375 """Get the battery percentage.
1376
1377 @return: The percentage of battery level, value range from 0-100. Return
1378 None if the battery info cannot be retrieved.
1379 """
1380 try:
1381 info = self.get_power_supply_info()
1382 logging.info(info)
1383 return float(info['Battery']['percentage'])
Dan Shie9b765d2014-12-29 16:59:49 -08001384 except (KeyError, ValueError, error.AutoservRunError):
Dan Shi49ca0932014-11-14 11:22:27 -08001385 return None
1386
1387
1388 def is_ac_connected(self):
1389 """Check if the dut has power adapter connected and charging.
1390
1391 @return: True if power adapter is connected and charging.
1392 """
1393 try:
1394 info = self.get_power_supply_info()
1395 return info['Line Power']['online'] == 'yes'
Dan Shie9b765d2014-12-29 16:59:49 -08001396 except (KeyError, error.AutoservRunError):
1397 return None
Dan Shi49ca0932014-11-14 11:22:27 -08001398
1399
Simran Basi5e6339a2013-03-21 11:34:32 -07001400 def _cleanup_poweron(self):
1401 """Special cleanup method to make sure hosts always get power back."""
1402 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1403 hosts = afe.get_hosts(hostname=self.hostname)
1404 if not hosts or not (self._RPM_OUTLET_CHANGED in
1405 hosts[0].attributes):
1406 return
1407 logging.debug('This host has recently interacted with the RPM'
1408 ' Infrastructure. Ensuring power is on.')
1409 try:
1410 self.power_on()
Dan Shi7dca56e2014-11-11 17:07:56 -08001411 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1412 hostname=self.hostname)
Simran Basi5e6339a2013-03-21 11:34:32 -07001413 except rpm_client.RemotePowerException:
Simran Basi5e6339a2013-03-21 11:34:32 -07001414 logging.error('Failed to turn Power On for this host after '
1415 'cleanup through the RPM Infrastructure.')
Dan Shi7dca56e2014-11-11 17:07:56 -08001416 es_utils.ESMetadata().post(
1417 type_str='RPM_poweron_failure',
1418 metadata={'hostname': self.hostname})
Dan Shi49ca0932014-11-14 11:22:27 -08001419
1420 battery_percentage = self.get_battery_percentage()
Dan Shif01ebe22014-12-05 13:10:57 -08001421 if battery_percentage and battery_percentage < 50:
Dan Shi49ca0932014-11-14 11:22:27 -08001422 raise
1423 elif self.is_ac_connected():
1424 logging.info('The device has power adapter connected and '
1425 'charging. No need to try to turn RPM on '
1426 'again.')
1427 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1428 hostname=self.hostname)
1429 logging.info('Battery level is now at %s%%. The device may '
1430 'still have enough power to run test, so no '
1431 'exception will be raised.', battery_percentage)
1432
Simran Basi5e6339a2013-03-21 11:34:32 -07001433
beepsc87ff602013-07-31 21:53:00 -07001434 def _is_factory_image(self):
1435 """Checks if the image on the DUT is a factory image.
1436
1437 @return: True if the image on the DUT is a factory image.
1438 False otherwise.
1439 """
1440 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1441 return result.exit_status == 0
1442
1443
1444 def _restart_ui(self):
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001445 """Restart the Chrome UI.
beepsc87ff602013-07-31 21:53:00 -07001446
1447 @raises: FactoryImageCheckerException for factory images, since
1448 we cannot attempt to restart ui on them.
1449 error.AutoservRunError for any other type of error that
1450 occurs while restarting ui.
1451 """
1452 if self._is_factory_image():
1453 raise FactoryImageCheckerException('Cannot restart ui on factory '
1454 'images')
1455
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001456 # TODO(jrbarnette): The command to stop/start the ui job
1457 # should live inside cros_ui, too. However that would seem
1458 # to imply interface changes to the existing start()/restart()
1459 # functions, which is a bridge too far (for now).
1460 prompt = cros_ui.get_login_prompt_state(self)
1461 self.run('stop ui; start ui')
1462 cros_ui.wait_for_chrome_ready(prompt, self)
beepsc87ff602013-07-31 21:53:00 -07001463
1464
1465 def cleanup(self):
MK Ryu35d661e2014-09-25 17:44:10 -07001466 self.run('rm -f %s' % client_constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001467 try:
beepsc87ff602013-07-31 21:53:00 -07001468 self._restart_ui()
1469 except (error.AutotestRunError, error.AutoservRunError,
1470 FactoryImageCheckerException):
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001471 logging.warning('Unable to restart ui, rebooting device.')
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001472 # Since restarting the UI fails fall back to normal Autotest
1473 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001474 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001475 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001476 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001477 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001478
1479
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001480 def reboot(self, **dargs):
1481 """
1482 This function reboots the site host. The more generic
1483 RemoteHost.reboot() performs sync and sleeps for 5
1484 seconds. This is not necessary for Chrome OS devices as the
1485 sync should be finished in a short time during the reboot
1486 command.
1487 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001488 if 'reboot_cmd' not in dargs:
Doug Anderson7d5aeb22014-02-27 15:12:17 -08001489 reboot_timeout = dargs.get('reboot_timeout', 10)
1490 dargs['reboot_cmd'] = ('((reboot & sleep %d; reboot -f &)'
1491 ' </dev/null >/dev/null 2>&1 &)' %
1492 reboot_timeout)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001493 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001494 if 'fastsync' not in dargs:
1495 dargs['fastsync'] = True
Michael Liangda8c60a2014-06-03 13:24:51 -07001496
Charlie Mooneya8e6dab2014-05-29 14:37:55 -07001497 # For purposes of logging reboot times:
1498 # Get the board name i.e. 'daisy_spring'
Michael Liangca4f5a62014-07-10 15:45:13 -07001499 board_fullname = self.get_board()
1500
1501 # Strip the prefix and add it to dargs.
1502 dargs['board'] = board_fullname[board_fullname.find(':')+1:]
Fang Deng0ca40e22013-08-27 17:47:44 -07001503 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001504
1505
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07001506 def suspend(self, **dargs):
1507 """
1508 This function suspends the site host.
1509 """
1510 suspend_time = dargs.get('suspend_time', 60)
1511 dargs['timeout'] = suspend_time
1512 if 'suspend_cmd' not in dargs:
1513 cmd = ' && '.join(['echo 0 > /sys/class/rtc/rtc0/wakealarm',
1514 'echo +%d > /sys/class/rtc/rtc0/wakealarm' % suspend_time,
1515 'powerd_dbus_suspend --delay=0 &'])
1516 dargs['suspend_cmd'] = ('(( %s )'
1517 '< /dev/null >/dev/null 2>&1 &)' % cmd)
1518 super(CrosHost, self).suspend(**dargs)
1519
1520
Simran Basiec564392014-08-25 16:48:09 -07001521 def upstart_status(self, service_name):
1522 """Check the status of an upstart init script.
1523
1524 @param service_name: Service to look up.
1525
1526 @returns True if the service is running, False otherwise.
1527 """
1528 return self.run('status %s | grep start/running' %
1529 service_name).stdout.strip() != ''
1530
1531
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001532 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001533 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001534
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001535 Tests for the following conditions:
1536 1. All conditions tested by the parent version of this
1537 function.
1538 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001539 3. Sufficient space in /mnt/stateful_partition/encrypted.
1540 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001541
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001542 """
MK Ryu35d661e2014-09-25 17:44:10 -07001543 # Check if a job was crashed on this host.
1544 # If yes, avoid verification until crash-logs are collected.
1545 if self._need_crash_logs():
1546 raise error.AutoservCrashLogCollectRequired(
1547 'Need to collect crash-logs before verification')
1548
Fang Deng0ca40e22013-08-27 17:47:44 -07001549 super(CrosHost, self).verify_software()
J. Richard Barnette4164d1d2014-12-02 17:52:33 -08001550 self.check_inodes(
1551 '/mnt/stateful_partition',
1552 global_config.global_config.get_config_value(
1553 'SERVER', 'kilo_inodes_required', type=int,
1554 default=100))
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001555 self.check_diskspace(
1556 '/mnt/stateful_partition',
1557 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001558 'SERVER', 'gb_diskspace_required', type=float,
1559 default=20.0))
Gaurav Shahe448af82014-06-19 15:18:59 -07001560 encrypted_stateful_path = '/mnt/stateful_partition/encrypted'
1561 # Not all targets build with encrypted stateful support.
1562 if self.path_exists(encrypted_stateful_path):
1563 self.check_diskspace(
1564 encrypted_stateful_path,
1565 global_config.global_config.get_config_value(
1566 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1567 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001568
Simran Basiec564392014-08-25 16:48:09 -07001569 if not self.upstart_status('system-services'):
Prashanth B5d0a0512014-04-25 12:26:08 -07001570 raise error.AutoservError('Chrome failed to reach login. '
1571 'System services not running.')
1572
beepsc87ff602013-07-31 21:53:00 -07001573 # Factory images don't run update engine,
1574 # goofy controls dbus on these DUTs.
1575 if not self._is_factory_image():
1576 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001577 # Makes sure python is present, loads and can use built in functions.
1578 # We have seen cases where importing cPickle fails with undefined
1579 # symbols in cPickle.so.
1580 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001581
1582
Dan Shi49ca0932014-11-14 11:22:27 -08001583 def verify_hardware(self):
1584 """Verify hardware system of a Chrome OS system.
1585
1586 Check following hardware conditions:
1587 1. Battery level.
1588 2. Is power adapter connected.
1589 """
1590 logging.info('Battery percentage: %s', self.get_battery_percentage())
Dan Shie9b765d2014-12-29 16:59:49 -08001591 if self.is_ac_connected() is None:
1592 logging.info('Can not determine if the device has power adapter '
1593 'connected.')
1594 else:
1595 logging.info('Device %s power adapter connected and charging.',
1596 'has' if self.is_ac_connected() else 'does not have')
Dan Shi49ca0932014-11-14 11:22:27 -08001597
1598
Fang Deng96667ca2013-08-01 17:46:18 -07001599 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1600 connect_timeout=None, alive_interval=None):
1601 """Override default make_ssh_command to use options tuned for Chrome OS.
1602
1603 Tuning changes:
1604 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1605 connection failure. Consistency with remote_access.sh.
1606
1607 - ServerAliveInterval=180; which causes SSH to ping connection every
1608 180 seconds. In conjunction with ServerAliveCountMax ensures
1609 that if the connection dies, Autotest will bail out quickly.
1610 Originally tried 60 secs, but saw frequent job ABORTS where
1611 the test completed successfully.
1612
1613 - ServerAliveCountMax=3; consistency with remote_access.sh.
1614
1615 - ConnectAttempts=4; reduce flakiness in connection errors;
1616 consistency with remote_access.sh.
1617
1618 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1619 Host keys change with every new installation, don't waste
1620 memory/space saving them.
1621
1622 - SSH protocol forced to 2; needed for ServerAliveInterval.
1623
1624 @param user User name to use for the ssh connection.
1625 @param port Port on the target host to use for ssh connection.
1626 @param opts Additional options to the ssh command.
1627 @param hosts_file Ignored.
1628 @param connect_timeout Ignored.
1629 @param alive_interval Ignored.
1630 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001631 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1632 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001633 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1634 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1635 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1636 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001637 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1638 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001639
1640
beeps32a63082013-08-22 14:02:29 -07001641 def _create_ssh_tunnel(self, port, local_port):
1642 """Create an ssh tunnel from local_port to port.
1643
1644 @param port: remote port on the host.
1645 @param local_port: local forwarding port.
1646
1647 @return: the tunnel process.
1648 """
1649 # Chrome OS on the target closes down most external ports
1650 # for security. We could open the port, but doing that
1651 # would conflict with security tests that check that only
1652 # expected ports are open. So, to get to the port on the
1653 # target we use an ssh tunnel.
1654 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1655 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1656 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1657 logging.debug('Full tunnel command: %s', tunnel_cmd)
1658 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1659 logging.debug('Started ssh tunnel, local = %d'
1660 ' remote = %d, pid = %d',
1661 local_port, port, tunnel_proc.pid)
1662 return tunnel_proc
1663
1664
Christopher Wileydd181852013-10-10 19:56:58 -07001665 def _setup_rpc(self, port, command_name, remote_pid=None):
beeps32a63082013-08-22 14:02:29 -07001666 """Sets up a tunnel process and performs rpc connection book keeping.
1667
1668 This method assumes that xmlrpc and jsonrpc never conflict, since
1669 we can only either have an xmlrpc or a jsonrpc server listening on
1670 a remote port. As such, it enforces a single proxy->remote port
1671 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1672 and then tries to start an xmlrpc proxy forwarded to the same port,
1673 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1674
1675 1. None of the methods on the xmlrpc proxy will work because
1676 the server listening on B is jsonrpc.
1677
1678 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1679 server, as the only use case currently is goofy, which is tied to
1680 the factory image. It is much easier to handle a failed xmlrpc
1681 call on the client than it is to terminate goofy in this scenario,
1682 as doing the latter might leave the DUT in a hard to recover state.
1683
1684 With the current implementation newer rpc proxy connections will
1685 terminate the tunnel processes of older rpc connections tunneling
1686 to the same remote port. If methods are invoked on the client
1687 after this has happened they will fail with connection closed errors.
1688
1689 @param port: The remote forwarding port.
1690 @param command_name: The name of the remote process, to terminate
1691 using pkill.
1692
1693 @return A url that we can use to initiate the rpc connection.
1694 """
1695 self.rpc_disconnect(port)
1696 local_port = utils.get_unused_port()
1697 tunnel_proc = self._create_ssh_tunnel(port, local_port)
Christopher Wileydd181852013-10-10 19:56:58 -07001698 self._rpc_proxy_map[port] = (command_name, tunnel_proc, remote_pid)
beeps32a63082013-08-22 14:02:29 -07001699 return self._RPC_PROXY_URL % local_port
1700
1701
Christopher Wileyd78249a2013-03-01 13:05:31 -08001702 def xmlrpc_connect(self, command, port, command_name=None,
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001703 ready_test_name=None, timeout_seconds=10,
1704 logfile='/dev/null'):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001705 """Connect to an XMLRPC server on the host.
1706
1707 The `command` argument should be a simple shell command that
1708 starts an XMLRPC server on the given `port`. The command
1709 must not daemonize, and must terminate cleanly on SIGTERM.
1710 The command is started in the background on the host, and a
1711 local XMLRPC client for the server is created and returned
1712 to the caller.
1713
1714 Note that the process of creating an XMLRPC client makes no
1715 attempt to connect to the remote server; the caller is
1716 responsible for determining whether the server is running
1717 correctly, and is ready to serve requests.
1718
Christopher Wileyd78249a2013-03-01 13:05:31 -08001719 Optionally, the caller can pass ready_test_name, a string
1720 containing the name of a method to call on the proxy. This
1721 method should take no parameters and return successfully only
1722 when the server is ready to process client requests. When
1723 ready_test_name is set, xmlrpc_connect will block until the
1724 proxy is ready, and throw a TestError if the server isn't
1725 ready by timeout_seconds.
1726
beeps32a63082013-08-22 14:02:29 -07001727 If a server is already running on the remote port, this
1728 method will kill it and disconnect the tunnel process
1729 associated with the connection before establishing a new one,
1730 by consulting the rpc_proxy_map in rpc_disconnect.
1731
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001732 @param command Shell command to start the server.
1733 @param port Port number on which the server is expected to
1734 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001735 @param command_name String to use as input to `pkill` to
1736 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001737 @param ready_test_name String containing the name of a
1738 method defined on the XMLRPC server.
1739 @param timeout_seconds Number of seconds to wait
1740 for the server to become 'ready.' Will throw a
1741 TestFail error if server is not ready in time.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001742 @param logfile Logfile to send output when running
1743 'command' argument.
Yusuf Mohsinally8d19e3c2013-11-21 14:25:45 -08001744
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001745 """
Christopher Wileyc14f06a2013-10-16 13:55:39 -07001746 # Clean up any existing state. If the caller is willing
1747 # to believe their server is down, we ought to clean up
1748 # any tunnels we might have sitting around.
1749 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001750 # Start the server on the host. Redirection in the command
1751 # below is necessary, because 'ssh' won't terminate until
1752 # background child processes close stdin, stdout, and
1753 # stderr.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001754 remote_cmd = '%s </dev/null >%s 2>&1 & echo $!' % (command, logfile)
Christopher Wileydd181852013-10-10 19:56:58 -07001755 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001756 logging.debug('Started XMLRPC server on host %s, pid = %s',
1757 self.hostname, remote_pid)
1758
Christopher Wileydd181852013-10-10 19:56:58 -07001759 # Tunnel through SSH to be able to reach that remote port.
1760 rpc_url = self._setup_rpc(port, command_name, remote_pid=remote_pid)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001761 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001762
Christopher Wileyd78249a2013-03-01 13:05:31 -08001763 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001764 # retry.retry logs each attempt; calculate delay_sec to
1765 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001766 @retry.retry((socket.error,
1767 xmlrpclib.ProtocolError,
1768 httplib.BadStatusLine),
Chris Sosa65425082013-10-16 13:26:22 -07001769 timeout_min=timeout_seconds / 60.0,
1770 delay_sec=min(max(timeout_seconds / 20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001771 def ready_test():
1772 """ Call proxy.ready_test_name(). """
1773 getattr(proxy, ready_test_name)()
1774 successful = False
1775 try:
1776 logging.info('Waiting %d seconds for XMLRPC server '
1777 'to start.', timeout_seconds)
1778 ready_test()
1779 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001780 finally:
1781 if not successful:
1782 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001783 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001784 logging.info('XMLRPC server started successfully.')
1785 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001786
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001787
Jason Abeleb6f924f2013-11-13 16:01:54 -08001788 def syslog(self, message, tag='autotest'):
1789 """Logs a message to syslog on host.
1790
1791 @param message String message to log into syslog
1792 @param tag String tag prefix for syslog
1793
1794 """
1795 self.run('logger -t "%s" "%s"' % (tag, message))
1796
1797
beeps32a63082013-08-22 14:02:29 -07001798 def jsonrpc_connect(self, port):
1799 """Creates a jsonrpc proxy connection through an ssh tunnel.
1800
1801 This method exists to facilitate communication with goofy (which is
1802 the default system manager on all factory images) and as such, leaves
1803 most of the rpc server sanity checking to the caller. Unlike
1804 xmlrpc_connect, this method does not facilitate the creation of a remote
1805 jsonrpc server, as the only clients of this code are factory tests,
1806 for which the goofy system manager is built in to the image and starts
1807 when the target boots.
1808
1809 One can theoretically create multiple jsonrpc proxies all forwarded
1810 to the same remote port, provided the remote port has an rpc server
1811 listening. However, in doing so we stand the risk of leaking an
1812 existing tunnel process, so we always disconnect any older tunnels
1813 we might have through rpc_disconnect.
1814
1815 @param port: port on the remote host that is serving this proxy.
1816
1817 @return: The client proxy.
1818 """
1819 if not jsonrpclib:
1820 logging.warning('Jsonrpclib could not be imported. Check that '
1821 'site-packages contains jsonrpclib.')
1822 return None
1823
1824 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1825
1826 logging.info('Established a jsonrpc connection through port %s.', port)
1827 return proxy
1828
1829
1830 def rpc_disconnect(self, port):
1831 """Disconnect from an RPC server on the host.
1832
1833 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001834 the given `port`. Also closes the local ssh tunnel created
1835 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001836 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001837 client object; however disconnection will cause all
1838 subsequent calls to methods on the object to fail.
1839
1840 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001841 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001842
1843 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001844 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001845 """
beeps32a63082013-08-22 14:02:29 -07001846 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001847 return
Christopher Wileydd181852013-10-10 19:56:58 -07001848 remote_name, tunnel_proc, remote_pid = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001849 if remote_name:
1850 # We use 'pkill' to find our target process rather than
1851 # a PID, because the host may have rebooted since
1852 # connecting, and we don't want to kill an innocent
1853 # process with the same PID.
1854 #
1855 # 'pkill' helpfully exits with status 1 if no target
1856 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001857 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001858 # status.
1859 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001860 if remote_pid:
1861 logging.info('Waiting for RPC server "%s" shutdown',
1862 remote_name)
1863 start_time = time.time()
1864 while (time.time() - start_time <
1865 self._RPC_SHUTDOWN_TIMEOUT_SECONDS):
1866 running_processes = self.run(
1867 "pgrep -f '%s'" % remote_name,
1868 ignore_status=True).stdout.split()
1869 if not remote_pid in running_processes:
1870 logging.info('Shut down RPC server.')
1871 break
1872 time.sleep(self._RPC_SHUTDOWN_POLLING_PERIOD_SECONDS)
1873 else:
1874 raise error.TestError('Failed to shutdown RPC server %s' %
1875 remote_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001876
1877 if tunnel_proc.poll() is None:
1878 tunnel_proc.terminate()
1879 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1880 else:
1881 logging.debug('Tunnel pid %d terminated early, status %d',
1882 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001883 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001884
1885
beeps32a63082013-08-22 14:02:29 -07001886 def rpc_disconnect_all(self):
1887 """Disconnect all known RPC proxy ports."""
1888 for port in self._rpc_proxy_map.keys():
1889 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001890
1891
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001892 def poor_mans_rpc(self, fun):
1893 """
1894 Calls a function from client utils on the host and returns a string.
1895
1896 @param fun function in client utils namespace.
1897 @return output string from calling fun.
1898 """
Simran Basi263a9d32014-08-19 11:16:51 -07001899 script = 'cd %s/bin; ' % autotest.Autotest.get_installed_autodir(self)
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001900 script += 'python -c "import common; import utils;'
1901 script += 'print utils.%s"' % fun
1902 return script
1903
1904
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001905 def _ping_check_status(self, status):
1906 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001907
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001908 @param status Check the ping status against this value.
1909 @return True iff `status` and the result of ping are the same
1910 (i.e. both True or both False).
1911
1912 """
1913 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1914 return not (status ^ (ping_val == 0))
1915
1916 def _ping_wait_for_status(self, status, timeout):
1917 """Wait for the host to have a given status (UP or DOWN).
1918
1919 Status is checked by polling. Polling will not last longer
1920 than the number of seconds in `timeout`. The polling
1921 interval will be long enough that only approximately
1922 _PING_WAIT_COUNT polling cycles will be executed, subject
1923 to a maximum interval of about one minute.
1924
1925 @param status Waiting will stop immediately if `ping` of the
1926 host returns this status.
1927 @param timeout Poll for at most this many seconds.
1928 @return True iff the host status from `ping` matched the
1929 requested status at the time of return.
1930
1931 """
1932 # _ping_check_status() takes about 1 second, hence the
1933 # "- 1" in the formula below.
1934 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1935 end_time = time.time() + timeout
1936 while time.time() <= end_time:
1937 if self._ping_check_status(status):
1938 return True
1939 if poll_interval > 0:
1940 time.sleep(poll_interval)
1941
1942 # The last thing we did was sleep(poll_interval), so it may
1943 # have been too long since the last `ping`. Check one more
1944 # time, just to be sure.
1945 return self._ping_check_status(status)
1946
1947 def ping_wait_up(self, timeout):
1948 """Wait for the host to respond to `ping`.
1949
1950 N.B. This method is not a reliable substitute for
1951 `wait_up()`, because a host that responds to ping will not
1952 necessarily respond to ssh. This method should only be used
1953 if the target DUT can be considered functional even if it
1954 can't be reached via ssh.
1955
1956 @param timeout Minimum time to allow before declaring the
1957 host to be non-responsive.
1958 @return True iff the host answered to ping before the timeout.
1959
1960 """
1961 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001962
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001963 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001964 """Wait until the host no longer responds to `ping`.
1965
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001966 This function can be used as a slightly faster version of
1967 `wait_down()`, by avoiding potentially long ssh timeouts.
1968
1969 @param timeout Minimum time to allow for the host to become
1970 non-responsive.
1971 @return True iff the host quit answering ping before the
1972 timeout.
1973
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001974 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001975 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001976
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001977 def test_wait_for_sleep(self, sleep_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001978 """Wait for the client to enter low-power sleep mode.
1979
1980 The test for "is asleep" can't distinguish a system that is
1981 powered off; to confirm that the unit was asleep, it is
1982 necessary to force resume, and then call
1983 `test_wait_for_resume()`.
1984
1985 This function is expected to be called from a test as part
1986 of a sequence like the following:
1987
1988 ~~~~~~~~
1989 boot_id = host.get_boot_id()
1990 # trigger sleep on the host
1991 host.test_wait_for_sleep()
1992 # trigger resume on the host
1993 host.test_wait_for_resume(boot_id)
1994 ~~~~~~~~
1995
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001996 @param sleep_timeout time limit in seconds to allow the host sleep.
1997
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001998 @exception TestFail The host did not go to sleep within
1999 the allowed time.
2000 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002001 if sleep_timeout is None:
2002 sleep_timeout = self.SLEEP_TIMEOUT
2003
2004 if not self.ping_wait_down(timeout=sleep_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002005 raise error.TestFail(
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002006 'client failed to sleep after %d seconds' % sleep_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002007
2008
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002009 def test_wait_for_resume(self, old_boot_id, resume_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002010 """Wait for the client to resume from low-power sleep mode.
2011
2012 The `old_boot_id` parameter should be the value from
2013 `get_boot_id()` obtained prior to entering sleep mode. A
2014 `TestFail` exception is raised if the boot id changes.
2015
2016 See @ref test_wait_for_sleep for more on this function's
2017 usage.
2018
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002019 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002020 target host went to sleep.
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002021 @param resume_timeout time limit in seconds to allow the host up.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002022
2023 @exception TestFail The host did not respond within the
2024 allowed time.
2025 @exception TestFail The host responded, but the boot id test
2026 indicated a reboot rather than a sleep
2027 cycle.
2028 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002029 if resume_timeout is None:
2030 resume_timeout = self.RESUME_TIMEOUT
2031
2032 if not self.wait_up(timeout=resume_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002033 raise error.TestFail(
2034 'client failed to resume from sleep after %d seconds' %
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002035 resume_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002036 else:
2037 new_boot_id = self.get_boot_id()
2038 if new_boot_id != old_boot_id:
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002039 logging.error('client rebooted (old boot %s, new boot %s)',
2040 old_boot_id, new_boot_id)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002041 raise error.TestFail(
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002042 'client rebooted, but sleep was expected')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002043
2044
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002045 def test_wait_for_shutdown(self, shutdown_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002046 """Wait for the client to shut down.
2047
2048 The test for "has shut down" can't distinguish a system that
2049 is merely asleep; to confirm that the unit was down, it is
2050 necessary to force boot, and then call test_wait_for_boot().
2051
2052 This function is expected to be called from a test as part
2053 of a sequence like the following:
2054
2055 ~~~~~~~~
2056 boot_id = host.get_boot_id()
2057 # trigger shutdown on the host
2058 host.test_wait_for_shutdown()
2059 # trigger boot on the host
2060 host.test_wait_for_boot(boot_id)
2061 ~~~~~~~~
2062
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002063 @param shutdown_timeout time limit in seconds to allow the host down.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002064 @exception TestFail The host did not shut down within the
2065 allowed time.
2066 """
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002067 if shutdown_timeout is None:
2068 shutdown_timeout = self.SHUTDOWN_TIMEOUT
2069
2070 if not self.ping_wait_down(timeout=shutdown_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002071 raise error.TestFail(
2072 'client failed to shut down after %d seconds' %
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002073 shutdown_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002074
2075
2076 def test_wait_for_boot(self, old_boot_id=None):
2077 """Wait for the client to boot from cold power.
2078
2079 The `old_boot_id` parameter should be the value from
2080 `get_boot_id()` obtained prior to shutting down. A
2081 `TestFail` exception is raised if the boot id does not
2082 change. The boot id test is omitted if `old_boot_id` is not
2083 specified.
2084
2085 See @ref test_wait_for_shutdown for more on this function's
2086 usage.
2087
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002088 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002089 shut down.
2090
2091 @exception TestFail The host did not respond within the
2092 allowed time.
2093 @exception TestFail The host responded, but the boot id test
2094 indicated that there was no reboot.
2095 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002096 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002097 raise error.TestFail(
2098 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002099 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002100 elif old_boot_id:
2101 if self.get_boot_id() == old_boot_id:
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002102 logging.error('client not rebooted (boot %s)',
2103 old_boot_id)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002104 raise error.TestFail(
Tom Wai-Hong Tam01792682015-01-06 08:00:46 +08002105 'client is back up, but did not reboot')
Simran Basid5e5e272012-09-24 15:23:59 -07002106
2107
2108 @staticmethod
2109 def check_for_rpm_support(hostname):
2110 """For a given hostname, return whether or not it is powered by an RPM.
2111
Simran Basi1df55112013-09-06 11:25:09 -07002112 @param hostname: hostname to check for rpm support.
2113
Simran Basid5e5e272012-09-24 15:23:59 -07002114 @return None if this host does not follows the defined naming format
2115 for RPM powered DUT's in the lab. If it does follow the format,
2116 it returns a regular expression MatchObject instead.
2117 """
Fang Dengbaff9082015-01-06 13:46:15 -08002118 return re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002119
2120
2121 def has_power(self):
2122 """For this host, return whether or not it is powered by an RPM.
2123
2124 @return True if this host is in the CROS lab and follows the defined
2125 naming format.
2126 """
Fang Deng0ca40e22013-08-27 17:47:44 -07002127 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002128
2129
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002130 def _set_power(self, state, power_method):
2131 """Sets the power to the host via RPM, Servo or manual.
2132
2133 @param state Specifies which power state to set to DUT
2134 @param power_method Specifies which method of power control to
2135 use. By default "RPM" will be used. Valid values
2136 are the strings "RPM", "manual", "servoj10".
2137
2138 """
2139 ACCEPTABLE_STATES = ['ON', 'OFF']
2140
2141 if state.upper() not in ACCEPTABLE_STATES:
2142 raise error.TestError('State must be one of: %s.'
2143 % (ACCEPTABLE_STATES,))
2144
2145 if power_method == self.POWER_CONTROL_SERVO:
2146 logging.info('Setting servo port J10 to %s', state)
2147 self.servo.set('prtctl3_pwren', state.lower())
2148 time.sleep(self._USB_POWER_TIMEOUT)
2149 elif power_method == self.POWER_CONTROL_MANUAL:
2150 logging.info('You have %d seconds to set the AC power to %s.',
2151 self._POWER_CYCLE_TIMEOUT, state)
2152 time.sleep(self._POWER_CYCLE_TIMEOUT)
2153 else:
2154 if not self.has_power():
2155 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07002156 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
2157 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
2158 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07002159 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07002160
2161
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002162 def power_off(self, power_method=POWER_CONTROL_RPM):
2163 """Turn off power to this host via RPM, Servo or manual.
2164
2165 @param power_method Specifies which method of power control to
2166 use. By default "RPM" will be used. Valid values
2167 are the strings "RPM", "manual", "servoj10".
2168
2169 """
2170 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07002171
2172
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002173 def power_on(self, power_method=POWER_CONTROL_RPM):
2174 """Turn on power to this host via RPM, Servo or manual.
2175
2176 @param power_method Specifies which method of power control to
2177 use. By default "RPM" will be used. Valid values
2178 are the strings "RPM", "manual", "servoj10".
2179
2180 """
2181 self._set_power('ON', power_method)
2182
2183
2184 def power_cycle(self, power_method=POWER_CONTROL_RPM):
2185 """Cycle power to this host by turning it OFF, then ON.
2186
2187 @param power_method Specifies which method of power control to
2188 use. By default "RPM" will be used. Valid values
2189 are the strings "RPM", "manual", "servoj10".
2190
2191 """
2192 if power_method in (self.POWER_CONTROL_SERVO,
2193 self.POWER_CONTROL_MANUAL):
2194 self.power_off(power_method=power_method)
2195 time.sleep(self._POWER_CYCLE_TIMEOUT)
2196 self.power_on(power_method=power_method)
2197 else:
2198 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002199
2200
2201 def get_platform(self):
2202 """Determine the correct platform label for this host.
2203
2204 @returns a string representing this host's platform.
2205 """
2206 crossystem = utils.Crossystem(self)
2207 crossystem.init()
2208 # Extract fwid value and use the leading part as the platform id.
2209 # fwid generally follow the format of {platform}.{firmware version}
2210 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
2211 platform = crossystem.fwid().split('.')[0].lower()
2212 # Newer platforms start with 'Google_' while the older ones do not.
2213 return platform.replace('google_', '')
2214
2215
Hung-ying Tyanb1328032014-04-01 14:18:54 +08002216 def get_architecture(self):
2217 """Determine the correct architecture label for this host.
2218
2219 @returns a string representing this host's architecture.
2220 """
2221 crossystem = utils.Crossystem(self)
2222 crossystem.init()
2223 return crossystem.arch()
2224
2225
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002226 def get_chrome_version(self):
2227 """Gets the Chrome version number and milestone as strings.
2228
2229 Invokes "chrome --version" to get the version number and milestone.
2230
2231 @return A tuple (chrome_ver, milestone) where "chrome_ver" is the
2232 current Chrome version number as a string (in the form "W.X.Y.Z")
2233 and "milestone" is the first component of the version number
2234 (the "W" from "W.X.Y.Z"). If the version number cannot be parsed
2235 in the "W.X.Y.Z" format, the "chrome_ver" will be the full output
2236 of "chrome --version" and the milestone will be the empty string.
2237
2238 """
MK Ryu35d661e2014-09-25 17:44:10 -07002239 version_string = self.run(client_constants.CHROME_VERSION_COMMAND).stdout
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002240 return utils.parse_chrome_version(version_string)
2241
Aviv Keshet74c89a92013-02-04 15:18:30 -08002242 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07002243 def get_board(self):
2244 """Determine the correct board label for this host.
2245
2246 @returns a string representing this host's board.
2247 """
2248 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
2249 run_method=self.run)
2250 board = release_info['CHROMEOS_RELEASE_BOARD']
2251 # Devices in the lab generally have the correct board name but our own
2252 # development devices have {board_name}-signed-{key_type}. The board
2253 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08002254 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07002255 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08002256 return board_format_string % board.split('-')[0]
2257 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07002258
2259
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002260 @label_decorator('board_freq_mem')
2261 def get_board_with_frequency_and_memory(self):
2262 """
2263 Determines the board name with frequency and memory.
2264
2265 @returns a more detailed string representing the board. Examples are
2266 butterfly_1.1GHz_2GB, link_1.8GHz_4GB, x86-zgb_1.7GHz_2GB
2267 """
2268 board = self.run(self.poor_mans_rpc(
2269 'get_board_with_frequency_and_memory()')).stdout
2270 return 'board_freq_mem:%s' % str.strip(board)
2271
2272
Aviv Keshet74c89a92013-02-04 15:18:30 -08002273 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002274 def has_lightsensor(self):
2275 """Determine the correct board label for this host.
2276
2277 @returns the string 'lightsensor' if this host has a lightsensor or
2278 None if it does not.
2279 """
2280 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08002281 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07002282 try:
2283 # Run the search cmd following the symlinks. Stderr_tee is set to
2284 # None as there can be a symlink loop, but the command will still
2285 # execute correctly with a few messages printed to stderr.
2286 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
2287 return 'lightsensor'
2288 except error.AutoservRunError:
2289 # egrep exited with a return code of 1 meaning none of the possible
2290 # lightsensor files existed.
2291 return None
2292
2293
Aviv Keshet74c89a92013-02-04 15:18:30 -08002294 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002295 def has_bluetooth(self):
2296 """Determine the correct board label for this host.
2297
2298 @returns the string 'bluetooth' if this host has bluetooth or
2299 None if it does not.
2300 """
2301 try:
2302 self.run('test -d /sys/class/bluetooth/hci0')
2303 # test exited with a return code of 0.
2304 return 'bluetooth'
2305 except error.AutoservRunError:
2306 # test exited with a return code 1 meaning the directory did not
2307 # exist.
2308 return None
2309
2310
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002311 @label_decorator('gpu_family')
2312 def get_gpu_family(self):
2313 """
2314 Determine GPU family.
2315
2316 @returns a string representing the gpu family. Examples are mali, tegra,
2317 pinetrail, sandybridge, ivybridge, haswell and baytrail.
2318 """
2319 gpu_family = self.run(self.poor_mans_rpc('get_gpu_family()')).stdout
2320 return 'gpu_family:%s' % str.strip(gpu_family)
2321
2322
Ilja Friedel0ce0b602013-08-15 18:45:27 -07002323 @label_decorator('graphics')
2324 def get_graphics(self):
2325 """
2326 Determine the correct board label for this host.
2327
2328 @returns a string representing this host's graphics. For now ARM boards
2329 return graphics:gles while all other boards return graphics:gl. This
2330 may change over time, but for robustness reasons this should avoid
2331 executing code in actual graphics libraries (which may not be ready and
2332 is tested by graphics_GLAPICheck).
2333 """
2334 uname = self.run('uname -a').stdout.lower()
2335 if 'arm' in uname:
2336 return 'graphics:gles'
2337 return 'graphics:gl'
2338
2339
Bill Richardson4f595f52014-02-13 16:20:26 -08002340 @label_decorator('ec')
2341 def get_ec(self):
2342 """
2343 Determine the type of EC on this host.
2344
2345 @returns a string representing this host's embedded controller type.
2346 At present, it only returns "ec:cros", for Chrome OS ECs. Other types
2347 of EC (or none) don't return any strings, since no tests depend on
2348 those.
2349 """
2350 cmd = 'mosys ec info'
2351 # The output should look like these, so that the last field should
2352 # match our EC version scheme:
2353 #
2354 # stm | stm32f100 | snow_v1.3.139-375eb9f
2355 # ti | Unknown-10de | peppy_v1.5.114-5d52788
2356 #
2357 # Non-Chrome OS ECs will look like these:
2358 #
2359 # ENE | KB932 | 00BE107A00
2360 # ite | it8518 | 3.08
2361 #
2362 # And some systems don't have ECs at all (Lumpy, for example).
2363 regexp = r'^.*\|\s*(\S+_v\d+\.\d+\.\d+-[0-9a-f]+)\s*$'
2364
2365 ecinfo = self.run(command=cmd, ignore_status=True)
2366 if ecinfo.exit_status == 0:
2367 res = re.search(regexp, ecinfo.stdout)
2368 if res:
2369 logging.info("EC version is %s", res.groups()[0])
2370 return 'ec:cros'
2371 logging.info("%s got: %s", cmd, ecinfo.stdout)
2372 # Has an EC, but it's not a Chrome OS EC
2373 return None
2374 logging.info("%s exited with status %d", cmd, ecinfo.exit_status)
2375 # No EC present
2376 return None
2377
2378
Alec Berg31b932b2014-04-04 16:09:11 -07002379 @label_decorator('accels')
2380 def get_accels(self):
2381 """
2382 Determine the type of accelerometers on this host.
2383
2384 @returns a string representing this host's accelerometer type.
2385 At present, it only returns "accel:cros-ec", for accelerometers
2386 attached to a Chrome OS EC, or none, if no accelerometers.
2387 """
2388 # Check to make sure we have ectool
2389 rv = self.run('which ectool', ignore_status=True)
2390 if rv.exit_status:
2391 logging.info("No ectool cmd found, assuming no EC accelerometers")
2392 return None
2393
2394 # Check that the EC supports the motionsense command
2395 rv = self.run('ectool motionsense', ignore_status=True)
2396 if rv.exit_status:
2397 logging.info("EC does not support motionsense command "
2398 "assuming no EC accelerometers")
2399 return None
2400
2401 # Check that EC motion sensors are active
2402 active = self.run('ectool motionsense active').stdout.split('\n')
2403 if active[0] == "0":
2404 logging.info("Motion sense inactive, assuming no EC accelerometers")
2405 return None
2406
2407 logging.info("EC accelerometers found")
2408 return 'accel:cros-ec'
2409
2410
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002411 @label_decorator('chameleon')
2412 def has_chameleon(self):
2413 """Determine if a Chameleon connected to this host.
2414
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002415 @returns a list containing two strings ('chameleon' and
2416 'chameleon:' + label, e.g. 'chameleon:hdmi') if this host
2417 has a Chameleon or None if it has not.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002418 """
2419 if self._chameleon_host:
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002420 return ['chameleon', 'chameleon:' + self.chameleon.get_label()]
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002421 else:
2422 return None
2423
2424
Cheng-Yi Chiangf4104ff2014-12-23 19:39:01 +08002425 @label_decorator('audio_loopback_dongle')
2426 def has_loopback_dongle(self):
2427 """Determine if an audio loopback dongle is plugged to this host.
2428
2429 @returns 'audio_loopback_dongle' when there is an audio loopback dongle
2430 plugged to this host.
2431 None when there is no audio loopback dongle
2432 plugged to this host.
2433 """
2434 server_info = self.run(command='cras_test_client --dump_s',
2435 ignore_status=True).stdout
2436 if (cras_utils.node_type_is_plugged('HEADPHONE', server_info) and
2437 cras_utils.node_type_is_plugged('MIC', server_info)):
2438 return 'audio_loopback_dongle'
2439 else:
2440 return None
2441
2442
Derek Basehorec71ff622014-07-07 15:18:40 -07002443 @label_decorator('power_supply')
2444 def get_power_supply(self):
2445 """
2446 Determine what type of power supply the host has
2447
2448 @returns a string representing this host's power supply.
2449 'power:battery' when the device has a battery intended for
2450 extended use
2451 'power:AC_primary' when the device has a battery not intended
2452 for extended use (for moving the machine, etc)
2453 'power:AC_only' when the device has no battery at all.
2454 """
2455 psu = self.run(command='mosys psu type', ignore_status=True)
2456 if psu.exit_status:
2457 # The psu command for mosys is not included for all platforms. The
2458 # assumption is that the device will have a battery if the command
2459 # is not found.
2460 return 'power:battery'
2461
2462 psu_str = psu.stdout.strip()
2463 if psu_str == 'unknown':
2464 return None
2465
2466 return 'power:%s' % psu_str
2467
2468
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002469 @label_decorator('storage')
2470 def get_storage(self):
2471 """
2472 Determine the type of boot device for this host.
2473
2474 Determine if the internal device is SCSI or dw_mmc device.
2475 Then check that it is SSD or HDD or eMMC or something else.
2476
2477 @returns a string representing this host's internal device type.
2478 'storage:ssd' when internal device is solid state drive
2479 'storage:hdd' when internal device is hard disk drive
2480 'storage:mmc' when internal device is mmc drive
2481 None When internal device is something else or
2482 when we are unable to determine the type
2483 """
2484 # The output should be /dev/mmcblk* for SD/eMMC or /dev/sd* for scsi
2485 rootdev_cmd = ' '.join(['. /usr/sbin/write_gpt.sh;',
2486 '. /usr/share/misc/chromeos-common.sh;',
2487 'load_base_vars;',
2488 'get_fixed_dst_drive'])
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002489 rootdev = self.run(command=rootdev_cmd, ignore_status=True)
2490 if rootdev.exit_status:
2491 logging.info("Fail to run %s", rootdev_cmd)
2492 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002493 rootdev_str = rootdev.stdout.strip()
2494
2495 if not rootdev_str:
2496 return None
2497
2498 rootdev_base = os.path.basename(rootdev_str)
2499
2500 mmc_pattern = '/dev/mmcblk[0-9]'
2501 if re.match(mmc_pattern, rootdev_str):
2502 # Use type to determine if the internal device is eMMC or somthing
2503 # else. We can assume that MMC is always an internal device.
2504 type_cmd = 'cat /sys/block/%s/device/type' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002505 type = self.run(command=type_cmd, ignore_status=True)
2506 if type.exit_status:
2507 logging.info("Fail to run %s", type_cmd)
2508 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002509 type_str = type.stdout.strip()
2510
2511 if type_str == 'MMC':
2512 return 'storage:mmc'
2513
2514 scsi_pattern = '/dev/sd[a-z]+'
2515 if re.match(scsi_pattern, rootdev.stdout):
2516 # Read symlink for /sys/block/sd* to determine if the internal
2517 # device is connected via ata or usb.
2518 link_cmd = 'readlink /sys/block/%s' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002519 link = self.run(command=link_cmd, ignore_status=True)
2520 if link.exit_status:
2521 logging.info("Fail to run %s", link_cmd)
2522 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002523 link_str = link.stdout.strip()
2524 if 'usb' in link_str:
2525 return None
2526
2527 # Read rotation to determine if the internal device is ssd or hdd.
2528 rotate_cmd = str('cat /sys/block/%s/queue/rotational'
2529 % rootdev_base)
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002530 rotate = self.run(command=rotate_cmd, ignore_status=True)
2531 if rotate.exit_status:
2532 logging.info("Fail to run %s", rotate_cmd)
2533 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002534 rotate_str = rotate.stdout.strip()
2535
2536 rotate_dict = {'0':'storage:ssd', '1':'storage:hdd'}
2537 return rotate_dict.get(rotate_str)
2538
2539 # All other internal device / error case will always fall here
2540 return None
2541
2542
Dan Shi4e9a2aa2014-03-24 14:28:42 -07002543 @label_decorator('servo')
2544 def get_servo(self):
2545 """Determine if the host has a servo attached.
2546
2547 If the host has a working servo attached, it should have a servo label.
2548
2549 @return: string 'servo' if the host has servo attached. Otherwise,
2550 returns None.
2551 """
2552 return 'servo' if self._servo_host else None
2553
2554
Dan Shi5beba472014-05-28 22:46:07 -07002555 @label_decorator('video_labels')
2556 def get_video_labels(self):
2557 """Run /usr/local/bin/avtest_label_detect to get a list of video labels.
2558
2559 Sample output of avtest_label_detect:
2560 Detected label: hw_video_acc_vp8
2561 Detected label: webcam
2562
2563 @return: A list of labels detected by tool avtest_label_detect.
2564 """
2565 try:
Simran Basi40ca8182014-07-17 18:41:20 -07002566 # TODO (sbasi) crbug.com/391081 - Remove once the proper fix has
2567 # landed and supporting images older than the fix is no longer
2568 # necessary.
2569 # Change back to VT1 so avtest_label_detect does not get stuck.
2570 self.run('chvt 1')
Dan Shi5beba472014-05-28 22:46:07 -07002571 result = self.run('/usr/local/bin/avtest_label_detect').stdout
2572 return re.findall('^Detected label: (\w+)$', result, re.M)
2573 except error.AutoservRunError:
2574 # The tool is not installed.
2575 return []
2576
2577
mussa584b4462014-06-20 15:13:28 -07002578 @label_decorator('video_glitch_detection')
2579 def is_video_glitch_detection_supported(self):
2580 """ Determine if a board under test is supported for video glitch
2581 detection tests.
2582
2583 @return: 'video_glitch_detection' if board is supported, None otherwise.
2584 """
2585 parser = ConfigParser.SafeConfigParser()
2586 filename = os.path.join(
2587 common.autotest_dir, 'client/cros/video/device_spec.conf')
2588
2589 dut = self.get_board().replace(ds_constants.BOARD_PREFIX, '')
2590
2591 try:
2592 parser.read(filename)
mussa584b4462014-06-20 15:13:28 -07002593 supported_boards = parser.sections()
2594
Mussa83c84d62014-10-02 12:11:28 -07002595 return 'video_glitch_detection' if dut in supported_boards else None
mussa584b4462014-06-20 15:13:28 -07002596
2597 except ConfigParser.error:
2598 # something went wrong while parsing the conf file
2599 return None
2600
Katherine Threlkeld7b97a9f2014-06-24 13:47:14 -07002601 @label_decorator('touch_labels')
2602 def get_touch(self):
2603 """
2604 Determine whether board under test has a touchpad or touchscreen.
2605
2606 @return: A list of some combination of 'touchscreen' and 'touchpad',
2607 depending on what is present on the device.
2608 """
2609 labels = []
2610 input_cmd = '/opt/google/input/inputcontrol --names -t %s'
2611 for elt in ['touchpad', 'touchscreen']:
2612 if self.run(input_cmd % elt).stdout:
2613 labels.append(elt)
2614 return labels
2615
2616
mussa584b4462014-06-20 15:13:28 -07002617
Simran Basic6f1f7a2012-10-16 10:47:46 -07002618 def get_labels(self):
2619 """Return a list of labels for this given host.
2620
2621 This is the main way to retrieve all the automatic labels for a host
2622 as it will run through all the currently implemented label functions.
2623 """
2624 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08002625 for label_function in self._LABEL_FUNCTIONS:
J. Richard Barnetteb869b222014-09-03 17:55:44 -07002626 try:
2627 label = label_function(self)
2628 except Exception as e:
2629 logging.error('Label function %s failed; ignoring it.',
2630 label_function.__name__)
2631 logging.exception(e)
2632 label = None
Simran Basic6f1f7a2012-10-16 10:47:46 -07002633 if label:
Dan Shi5beba472014-05-28 22:46:07 -07002634 if type(label) is str:
2635 labels.append(label)
2636 elif type(label) is list:
2637 labels.extend(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -07002638 return labels
Dan Shi85276d42014-04-08 22:11:45 -07002639
2640
2641 def is_boot_from_usb(self):
2642 """Check if DUT is boot from USB.
2643
2644 @return: True if DUT is boot from usb.
2645 """
2646 device = self.run('rootdev -s -d').stdout.strip()
2647 removable = int(self.run('cat /sys/block/%s/removable' %
2648 os.path.basename(device)).stdout.strip())
2649 return removable == 1
Helen Zhang17dae2b2014-11-11 09:25:52 -08002650
2651
2652 def read_from_meminfo(self, key):
Dan Shi49ca0932014-11-14 11:22:27 -08002653 """Return the memory info from /proc/meminfo
Helen Zhang17dae2b2014-11-11 09:25:52 -08002654
2655 @param key: meminfo requested
2656
2657 @return the memory value as a string
2658
2659 """
Helen Zhang17dae2b2014-11-11 09:25:52 -08002660 meminfo = self.run('grep %s /proc/meminfo' % key).stdout.strip()
2661 logging.debug('%s', meminfo)
2662 return int(re.search(r'\d+', meminfo).group(0))