blob: 7f3dfce2153ab1326eb1c424c615055c64ee4c32 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
mussa584b4462014-06-20 15:13:28 -07005import ConfigParser
Aviv Keshet74c89a92013-02-04 15:18:30 -08006import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07007import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import logging
Dan Shi0f466e82013-02-22 15:44:58 -08009import os
Simran Basid5e5e272012-09-24 15:23:59 -070010import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080011import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070014import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070015
mussa584b4462014-06-20 15:13:28 -070016import common
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.bin import utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070018from autotest_lib.client.common_lib import autotemp
Richard Barnette0c73ffc2012-11-19 15:21:18 -080019from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080022from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080023from autotest_lib.client.common_lib.cros import retry
Dan Shi7dca56e2014-11-11 17:07:56 -080024from autotest_lib.client.common_lib.cros.graphite import es_utils
Michael Liangda8c60a2014-06-03 13:24:51 -070025from autotest_lib.client.common_lib.cros.graphite import stats
MK Ryu35d661e2014-09-25 17:44:10 -070026from autotest_lib.client.cros import constants as client_constants
J. Richard Barnette84890bd2014-02-21 11:05:47 -080027from autotest_lib.client.cros import cros_ui
Cheng-Yi Chiangf4104ff2014-12-23 19:39:01 +080028from autotest_lib.client.cros.audio import cras_utils
MK Ryu35d661e2014-09-25 17:44:10 -070029from autotest_lib.server import autoserv_parser
30from autotest_lib.server import autotest
31from autotest_lib.server import constants
32from autotest_lib.server import crashcollect
Dan Shia1ecd5c2013-06-06 11:21:31 -070033from autotest_lib.server import utils as server_utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070034from autotest_lib.server.cros import provision
Scott Zawalski89c44dd2013-02-26 09:28:02 -050035from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070036from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Dan Shi9cb0eec2014-06-03 09:04:50 -070037from autotest_lib.server.cros.faft.config.config import Config as FAFTConfig
Fang Deng96667ca2013-08-01 17:46:18 -070038from autotest_lib.server.hosts import abstract_ssh
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +080039from autotest_lib.server.hosts import chameleon_host
Fang Deng5d518f42013-08-02 14:04:32 -070040from autotest_lib.server.hosts import servo_host
Simran Basidcff4252012-11-20 16:13:20 -080041from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070042
43
beeps32a63082013-08-22 14:02:29 -070044try:
45 import jsonrpclib
46except ImportError:
47 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070048
Fang Dengd1c2b732013-08-20 12:59:46 -070049
beepsc87ff602013-07-31 21:53:00 -070050class FactoryImageCheckerException(error.AutoservError):
51 """Exception raised when an image is a factory image."""
52 pass
53
54
Aviv Keshet74c89a92013-02-04 15:18:30 -080055def add_label_detector(label_function_list, label_list=None, label=None):
56 """Decorator used to group functions together into the provided list.
57 @param label_function_list: List of label detecting functions to add
58 decorated function to.
59 @param label_list: List of detectable labels to add detectable labels to.
60 (Default: None)
61 @param label: Label string that is detectable by this detection function
62 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080063 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070064 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080065 """
66 @param func: The function to be added as a detector.
67 """
68 label_function_list.append(func)
69 if label and label_list is not None:
70 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070071 return func
72 return add_func
73
74
Fang Deng0ca40e22013-08-27 17:47:44 -070075class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070076 """Chromium OS specific subclass of Host."""
77
78 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050079 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070080
Richard Barnette03a0c132012-11-05 12:40:35 -080081 # Timeout values (in seconds) associated with various Chrome OS
82 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070083 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080084 # In general, a good rule of thumb is that the timeout can be up
85 # to twice the typical measured value on the slowest platform.
86 # The times here have not necessarily been empirically tested to
87 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070088 #
89 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080090 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
91 # time to restart the netwowrk.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080092 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070093 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080094 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080095 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070096 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080097 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080098 # network.
beepsf079cfb2013-09-18 17:49:51 -070099 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800100 # POWERWASH_BOOT_TIMEOUT: Time to allow for a reboot that
101 # includes powerwash.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700102
103 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800104 RESUME_TIMEOUT = 10
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +0800105 SHUTDOWN_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700106 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700107 USB_BOOT_TIMEOUT = 150
J. Richard Barnette7817b052014-08-28 09:47:29 -0700108 INSTALL_TIMEOUT = 480
Dan Shi2c88eed2013-11-12 10:18:38 -0800109 POWERWASH_BOOT_TIMEOUT = 60
Chris Sosab76e0ee2013-05-22 16:55:41 -0700110
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800111 # REBOOT_TIMEOUT: How long to wait for a reboot.
112 #
Chris Sosab76e0ee2013-05-22 16:55:41 -0700113 # We have a long timeout to ensure we don't flakily fail due to other
114 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
Simran Basi1160e2c2013-10-04 16:00:24 -0700115 # TODO(sbasi - crbug.com/276094) Restore to 5 mins once the 'host did not
116 # return from reboot' bug is solved.
117 REBOOT_TIMEOUT = 480
Chris Sosab76e0ee2013-05-22 16:55:41 -0700118
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800119 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
120 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
121 _USB_POWER_TIMEOUT = 5
122 _POWER_CYCLE_TIMEOUT = 10
123
beeps32a63082013-08-22 14:02:29 -0700124 _RPC_PROXY_URL = 'http://localhost:%d'
Christopher Wileydd181852013-10-10 19:56:58 -0700125 _RPC_SHUTDOWN_POLLING_PERIOD_SECONDS = 2
Peter Qiu4410db72014-06-05 10:32:41 -0700126 # Set shutdown timeout to account for the time for restarting the UI.
127 _RPC_SHUTDOWN_TIMEOUT_SECONDS = cros_ui.RESTART_UI_TIMEOUT
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800128
Richard Barnette82c35912012-11-20 10:09:10 -0800129 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
130 'rpm_recovery_boards', type=str).split(',')
131
132 _MAX_POWER_CYCLE_ATTEMPTS = 6
133 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
Fang Dengdeba14f2014-11-14 11:54:09 -0800134 _RPM_HOSTNAME_REGEX = ('chromeos(\d+)(-row(\d+))?-rack(\d+[a-z]*)'
135 '-host(\d+)')
Gwendal Grignoua66f1d12014-12-03 10:07:26 -0800136 _LIGHT_SENSOR_FILES = [ "in_illuminance0_input",
137 "in_illuminance_input",
138 "in_illuminance0_raw",
139 "in_illuminance_raw",
140 "illuminance0_input"]
Richard Barnette82c35912012-11-20 10:09:10 -0800141 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
142 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800143 _DETECTABLE_LABELS = []
144 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
145 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700146
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800147 # Constants used in ping_wait_up() and ping_wait_down().
148 #
149 # _PING_WAIT_COUNT is the approximate number of polling
150 # cycles to use when waiting for a host state change.
151 #
152 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
153 # for arguments to the internal _ping_wait_for_status()
154 # method.
155 _PING_WAIT_COUNT = 40
156 _PING_STATUS_DOWN = False
157 _PING_STATUS_UP = True
158
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800159 # Allowed values for the power_method argument.
160
161 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
162 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
163 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
164 POWER_CONTROL_RPM = 'RPM'
165 POWER_CONTROL_SERVO = 'servoj10'
166 POWER_CONTROL_MANUAL = 'manual'
167
168 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
169 POWER_CONTROL_SERVO,
170 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800171
Simran Basi5e6339a2013-03-21 11:34:32 -0700172 _RPM_OUTLET_CHANGED = 'outlet_changed'
173
Dan Shi9cb0eec2014-06-03 09:04:50 -0700174 # URL pattern to download firmware image.
175 _FW_IMAGE_URL_PATTERN = global_config.global_config.get_config_value(
176 'CROS', 'firmware_url_pattern', type=str)
beeps687243d2013-07-18 15:29:27 -0700177
MK Ryu35d661e2014-09-25 17:44:10 -0700178 # File that has a list of directories to be collected
179 _LOGS_TO_COLLECT_FILE = os.path.join(
180 common.client_dir, 'common_lib', 'logs_to_collect')
181
182 # Prefix of logging message w.r.t. crash collection
183 _CRASHLOGS_PREFIX = 'collect_crashlogs'
184
185 # Time duration waiting for host up/down check
186 _CHECK_HOST_UP_TIMEOUT_SECS = 15
187
188 # A command that interacts with kernel and hardware (e.g., rm, mkdir, etc)
189 # might not be completely done deep through the hardware when the machine
190 # is powered down right after the command returns.
191 # We should wait for a few seconds to make them done. Finger crossed.
192 _SAFE_WAIT_SECS = 10
193
194
J. Richard Barnette964fba02012-10-24 17:34:29 -0700195 @staticmethod
beeps46dadc92013-11-07 14:07:10 -0800196 def check_host(host, timeout=10):
197 """
198 Check if the given host is a chrome-os host.
199
200 @param host: An ssh host representing a device.
201 @param timeout: The timeout for the run command.
202
203 @return: True if the host device is chromeos.
204
beeps46dadc92013-11-07 14:07:10 -0800205 """
206 try:
Christopher Wiley1ea80942014-02-26 16:45:08 -0800207 result = host.run('grep -q CHROMEOS /etc/lsb-release && '
Simran Basie5f7ae42014-06-26 15:44:06 -0700208 '! which adb >/dev/null 2>&1 && '
209 '! grep -q moblab /etc/lsb-release',
Christopher Wileyfc3eac02013-11-21 16:24:57 -0800210 ignore_status=True, timeout=timeout)
beeps46dadc92013-11-07 14:07:10 -0800211 except (error.AutoservRunError, error.AutoservSSHTimeout):
212 return False
213 return result.exit_status == 0
214
215
216 @staticmethod
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800217 def _extract_arguments(args_dict, key_subset):
218 """Extract options from `args_dict` and return a subset result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800219
220 Take the provided dictionary of argument options and return
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800221 a subset that represent standard arguments needed to construct
222 a test-assistant object (chameleon or servo) for a host. The
223 intent is to provide standard argument processing from
224 run_remote_tests for tests that require a test-assistant board
225 to operate.
226
227 @param args_dict Dictionary from which to extract the arguments.
228 @param key_subset Tuple of keys to extract from the args_dict, e.g.
229 ('servo_host', 'servo_port').
230 """
231 result = {}
232 for arg in key_subset:
233 if arg in args_dict:
234 result[arg] = args_dict[arg]
235 return result
236
237
238 @staticmethod
239 def get_chameleon_arguments(args_dict):
240 """Extract chameleon options from `args_dict` and return the result.
241
242 Recommended usage:
243 ~~~~~~~~
244 args_dict = utils.args_to_dict(args)
245 chameleon_args = hosts.CrosHost.get_chameleon_arguments(args_dict)
246 host = hosts.create_host(machine, chameleon_args=chameleon_args)
247 ~~~~~~~~
248
249 @param args_dict Dictionary from which to extract the chameleon
250 arguments.
251 """
252 return CrosHost._extract_arguments(
253 args_dict, ('chameleon_host', 'chameleon_port'))
254
255
256 @staticmethod
257 def get_servo_arguments(args_dict):
258 """Extract servo options from `args_dict` and return the result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800259
260 Recommended usage:
261 ~~~~~~~~
262 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700263 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800264 host = hosts.create_host(machine, servo_args=servo_args)
265 ~~~~~~~~
266
267 @param args_dict Dictionary from which to extract the servo
268 arguments.
269 """
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800270 return CrosHost._extract_arguments(
271 args_dict, ('servo_host', 'servo_port'))
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700272
J. Richard Barnette964fba02012-10-24 17:34:29 -0700273
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800274 def _initialize(self, hostname, chameleon_args=None, servo_args=None,
275 ssh_verbosity_flag='', ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700276 *args, **dargs):
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800277 """Initialize superclasses, |self.chameleon|, and |self.servo|.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700278
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800279 This method checks whether a chameleon/servo (aka
280 test-assistant objects) is required by checking whether
281 chameleon_args/servo_args is None. This method will only
282 attempt to create the test-assistant object when it is
283 required by the test.
Fang Deng5d518f42013-08-02 14:04:32 -0700284
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800285 For creating the test-assistant object, there are three
286 possibilities: First, if the host is a lab system known to have
287 a test-assistant board, we connect to that board unconditionally.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700288 Second, if we're called from a control file that requires
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800289 test-assistant features for testing, it will pass settings from
290 the arguments, like `servo_host`, `servo_port`. If neither of
291 these cases apply, the test-assistant object will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700292
293 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700294 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700295 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700296 # self.env is a dictionary of environment variable settings
297 # to be exported for commands run on the host.
298 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
299 # errors that might happen.
300 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700301 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700302 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700303 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700304 # TODO(fdeng): We need to simplify the
305 # process of servo and servo_host initialization.
306 # crbug.com/298432
Dan Shi4d478522014-02-14 13:46:32 -0800307 self._servo_host = servo_host.create_servo_host(dut=self.hostname,
308 servo_args=servo_args)
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800309 # TODO(waihong): Do the simplication on Chameleon too.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800310 self._chameleon_host = chameleon_host.create_chameleon_host(
311 dut=self.hostname, chameleon_args=chameleon_args)
312
Dan Shi4d478522014-02-14 13:46:32 -0800313 if self._servo_host is not None:
314 self.servo = self._servo_host.get_servo()
315 else:
316 self.servo = None
317
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800318 if self._chameleon_host:
Tom Wai-Hong Tameaee3402014-01-22 08:52:10 +0800319 self.chameleon = self._chameleon_host.create_chameleon_board()
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800320 else:
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800321 self.chameleon = None
Fang Deng5d518f42013-08-02 14:04:32 -0700322
323
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500324 def get_repair_image_name(self):
325 """Generate a image_name from variables in the global config.
326
327 @returns a str of $board-version/$BUILD.
328
329 """
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500330 board = self._get_board_from_afe()
331 if board is None:
332 raise error.AutoservError('DUT has no board attribute, '
333 'cannot be repaired.')
Dan Shi6964fa52014-12-18 11:04:27 -0800334 stable_version = self._AFE.run('get_stable_version', board=board)
335 build_pattern = global_config.global_config.get_config_value(
336 'CROS', 'stable_build_pattern')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500337 return build_pattern % (board, stable_version)
338
339
Scott Zawalski62bacae2013-03-05 10:40:32 -0500340 def _host_in_AFE(self):
341 """Check if the host is an object the AFE knows.
342
343 @returns the host object.
344 """
345 return self._AFE.get_hosts(hostname=self.hostname)
346
347
Chris Sosab76e0ee2013-05-22 16:55:41 -0700348 def lookup_job_repo_url(self):
349 """Looks up the job_repo_url for the host.
350
351 @returns job_repo_url from AFE or None if not found.
352
353 @raises KeyError if the host does not have a job_repo_url
354 """
355 if not self._host_in_AFE():
356 return None
357
358 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700359 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
360 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700361
362
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500363 def clear_cros_version_labels_and_job_repo_url(self):
364 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500365 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400366 return
367
Scott Zawalski62bacae2013-03-05 10:40:32 -0500368 host_list = [self.hostname]
369 labels = self._AFE.get_labels(
370 name__startswith=ds_constants.VERSION_PREFIX,
371 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800372
Scott Zawalski62bacae2013-03-05 10:40:32 -0500373 for label in labels:
374 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500375
beepscb6f1e22013-06-28 19:14:10 -0700376 self.update_job_repo_url(None, None)
377
378
379 def update_job_repo_url(self, devserver_url, image_name):
380 """
381 Updates the job_repo_url host attribute and asserts it's value.
382
383 @param devserver_url: The devserver to use in the job_repo_url.
384 @param image_name: The name of the image to use in the job_repo_url.
385
386 @raises AutoservError: If we failed to update the job_repo_url.
387 """
388 repo_url = None
389 if devserver_url and image_name:
390 repo_url = tools.get_package_url(devserver_url, image_name)
391 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500392 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700393 if self.lookup_job_repo_url() != repo_url:
394 raise error.AutoservError('Failed to update job_repo_url with %s, '
395 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500396
397
Dan Shie9309262013-06-19 22:50:21 -0700398 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400399 """Add cros_version labels and host attribute job_repo_url.
400
401 @param image_name: The name of the image e.g.
402 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700403
Scott Zawalskieadbf702013-03-14 09:23:06 -0400404 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500405 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400406 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500407
Scott Zawalskieadbf702013-03-14 09:23:06 -0400408 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700409 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500410
411 labels = self._AFE.get_labels(name=cros_label)
412 if labels:
413 label = labels[0]
414 else:
415 label = self._AFE.create_label(name=cros_label)
416
417 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700418 self.update_job_repo_url(devserver_url, image_name)
419
420
beepsdae65fd2013-07-26 16:24:41 -0700421 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700422 """
423 Make sure job_repo_url of this host is valid.
424
joychen03eaad92013-06-26 09:55:21 -0700425 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700426 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
427 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
428 download and extract it. If the devserver embedded in the url is
429 unresponsive, update the job_repo_url of the host after staging it on
430 another devserver.
431
432 @param job_repo_url: A url pointing to the devserver where the autotest
433 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700434 @param tag: The tag from the server job, in the format
435 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700436
437 @raises DevServerException: If we could not resolve a devserver.
438 @raises AutoservError: If we're unable to save the new job_repo_url as
439 a result of choosing a new devserver because the old one failed to
440 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700441 @raises urllib2.URLError: If the devserver embedded in job_repo_url
442 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700443 """
444 job_repo_url = self.lookup_job_repo_url()
445 if not job_repo_url:
446 logging.warning('No job repo url set on host %s', self.hostname)
447 return
448
449 logging.info('Verifying job repo url %s', job_repo_url)
450 devserver_url, image_name = tools.get_devserver_build_from_package_url(
451 job_repo_url)
452
beeps0c865032013-07-30 11:37:06 -0700453 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700454
455 logging.info('Staging autotest artifacts for %s on devserver %s',
456 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700457
458 start_time = time.time()
Simran Basi25e7a922014-10-31 11:56:10 -0700459 ds.stage_artifacts(image_name, ['autotest_packages'])
beeps687243d2013-07-18 15:29:27 -0700460 stage_time = time.time() - start_time
461
462 # Record how much of the verification time comes from a devserver
463 # restage. If we're doing things right we should not see multiple
464 # devservers for a given board/build/branch path.
465 try:
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800466 board, build_type, branch = server_utils.ParseBuildName(
beeps687243d2013-07-18 15:29:27 -0700467 image_name)[:3]
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800468 except server_utils.ParseBuildNameException:
beeps687243d2013-07-18 15:29:27 -0700469 pass
470 else:
beeps0c865032013-07-30 11:37:06 -0700471 devserver = devserver_url[
Chris Sosa65425082013-10-16 13:26:22 -0700472 devserver_url.find('/') + 2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700473 stats_key = {
474 'board': board,
475 'build_type': build_type,
476 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700477 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700478 }
479 stats.Gauge('verify_job_repo_url').send(
480 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
481 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700482
Scott Zawalskieadbf702013-03-14 09:23:06 -0400483
Dan Shi0f466e82013-02-22 15:44:58 -0800484 def _try_stateful_update(self, update_url, force_update, updater):
485 """Try to use stateful update to initialize DUT.
486
487 When DUT is already running the same version that machine_install
488 tries to install, stateful update is a much faster way to clean up
489 the DUT for testing, compared to a full reimage. It is implemeted
490 by calling autoupdater.run_update, but skipping updating root, as
491 updating the kernel is time consuming and not necessary.
492
493 @param update_url: url of the image.
494 @param force_update: Set to True to update the image even if the DUT
495 is running the same version.
496 @param updater: ChromiumOSUpdater instance used to update the DUT.
497 @returns: True if the DUT was updated with stateful update.
498
499 """
J. Richard Barnette3f731032014-04-07 17:42:59 -0700500 # TODO(jrbarnette): Yes, I hate this re.match() test case.
501 # It's better than the alternative: see crbug.com/360944.
502 image_name = autoupdater.url_to_image_name(update_url)
503 release_pattern = r'^.*-release/R[0-9]+-[0-9]+\.[0-9]+\.0$'
504 if not re.match(release_pattern, image_name):
505 return False
Dan Shi0f466e82013-02-22 15:44:58 -0800506 if not updater.check_version():
507 return False
508 if not force_update:
509 logging.info('Canceling stateful update because the new and '
510 'old versions are the same.')
511 return False
512 # Following folders should be rebuilt after stateful update.
513 # A test file is used to confirm each folder gets rebuilt after
514 # the stateful update.
515 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
516 test_file = '.test_file_to_be_deleted'
517 for folder in folders_to_check:
518 touch_path = os.path.join(folder, test_file)
519 self.run('touch %s' % touch_path)
520
521 if not updater.run_update(force_update=True, update_root=False):
522 return False
523
524 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700525 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800526 check_file_cmd = 'test -f %s; echo $?'
527 for folder in folders_to_check:
528 test_file_path = os.path.join(folder, test_file)
529 result = self.run(check_file_cmd % test_file_path,
530 ignore_status=True)
531 if result.exit_status == 1:
532 return False
533 return True
534
535
J. Richard Barnette7275b612013-06-04 18:13:11 -0700536 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800537 """After the DUT is updated, confirm machine_install succeeded.
538
539 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700540 @param expected_kernel: kernel expected to be active after reboot,
541 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800542
543 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700544 # Touch the lab machine file to leave a marker that
545 # distinguishes this image from other test images.
546 # Afterwards, we must re-run the autoreboot script because
547 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800548 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800549 self.run('start autoreboot')
Chris Sosa65425082013-10-16 13:26:22 -0700550 updater.verify_boot_expectations(
551 expected_kernel, rollback_message=
552 'Build %s failed to boot on %s; system rolled back to previous'
553 'build' % (updater.update_version, self.hostname))
J. Richard Barnette7275b612013-06-04 18:13:11 -0700554 # Check that we've got the build we meant to install.
555 if not updater.check_version_to_confirm_install():
556 raise autoupdater.ChromiumOSError(
557 'Failed to update %s to build %s; found build '
558 '%s instead' % (self.hostname,
Chris Sosa65425082013-10-16 13:26:22 -0700559 updater.update_version,
560 updater.get_build_id()))
Dan Shi0f466e82013-02-22 15:44:58 -0800561
562
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700563 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400564 """Stage a build on a devserver and return the update_url.
565
566 @param image_name: a name like lumpy-release/R27-3837.0.0
567 @returns an update URL like:
568 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
569 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700570 if not image_name:
571 image_name = self.get_repair_image_name()
572 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400573 devserver = dev_server.ImageServer.resolve(image_name)
574 devserver.trigger_download(image_name, synchronous=False)
575 return tools.image_url_pattern() % (devserver.url(), image_name)
576
577
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700578 def stage_image_for_servo(self, image_name=None):
579 """Stage a build on a devserver and return the update_url.
580
581 @param image_name: a name like lumpy-release/R27-3837.0.0
582 @returns an update URL like:
583 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
584 """
585 if not image_name:
586 image_name = self.get_repair_image_name()
587 logging.info('Staging build for servo install: %s', image_name)
588 devserver = dev_server.ImageServer.resolve(image_name)
589 devserver.stage_artifacts(image_name, ['test_image'])
590 return devserver.get_test_image_url(image_name)
591
592
beepse539be02013-07-31 21:57:39 -0700593 def stage_factory_image_for_servo(self, image_name):
594 """Stage a build on a devserver and return the update_url.
595
596 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700597
beepse539be02013-07-31 21:57:39 -0700598 @return: An update URL, eg:
599 http://<devserver>/static/canary-channel/\
600 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700601
602 @raises: ValueError if the factory artifact name is missing from
603 the config.
604
beepse539be02013-07-31 21:57:39 -0700605 """
606 if not image_name:
607 logging.error('Need an image_name to stage a factory image.')
608 return
609
beeps12c0a3c2013-09-03 11:58:27 -0700610 factory_artifact = global_config.global_config.get_config_value(
611 'CROS', 'factory_artifact', type=str, default='')
612 if not factory_artifact:
613 raise ValueError('Cannot retrieve the factory artifact name from '
614 'autotest config, and hence cannot stage factory '
615 'artifacts.')
616
beepse539be02013-07-31 21:57:39 -0700617 logging.info('Staging build for servo install: %s', image_name)
618 devserver = dev_server.ImageServer.resolve(image_name)
619 devserver.stage_artifacts(
620 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700621 [factory_artifact],
622 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700623
624 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
625
626
Chris Sosaa3ac2152012-05-23 22:23:13 -0700627 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500628 local_devserver=False, repair=False):
629 """Install the DUT.
630
Dan Shi0f466e82013-02-22 15:44:58 -0800631 Use stateful update if the DUT is already running the same build.
632 Stateful update does not update kernel and tends to run much faster
633 than a full reimage. If the DUT is running a different build, or it
634 failed to do a stateful update, full update, including kernel update,
635 will be applied to the DUT.
636
Scott Zawalskieadbf702013-03-14 09:23:06 -0400637 Once a host enters machine_install its cros_version label will be
638 removed as well as its host attribute job_repo_url (used for
639 package install).
640
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500641 @param update_url: The url to use for the update
642 pattern: http://$devserver:###/update/$build
643 If update_url is None and repair is True we will install the
Dan Shi6964fa52014-12-18 11:04:27 -0800644 stable image listed in afe_stable_versions table. If the table
645 is not setup, global_config value under CROS.stable_cros_version
646 will be used instead.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500647 @param force_update: Force an update even if the version installed
648 is the same. Default:False
649 @param local_devserver: Used by run_remote_test to allow people to
650 use their local devserver. Default: False
651 @param repair: Whether or not we are in repair mode. This adds special
652 cases for repairing a machine like starting update_engine.
653 Setting repair to True sets force_update to True as well.
654 default: False
655 @raises autoupdater.ChromiumOSError
656
657 """
Dan Shi7458bf62013-06-10 12:50:16 -0700658 if update_url:
659 logging.debug('update url is set to %s', update_url)
660 else:
661 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700662 if self._parser.options.image:
663 requested_build = self._parser.options.image
664 if requested_build.startswith('http://'):
665 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700666 logging.debug('update url is retrieved from requested_build'
667 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700668 else:
669 # Try to stage any build that does not start with
670 # http:// on the devservers defined in
671 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700672 update_url = self._stage_image_for_update(requested_build)
673 logging.debug('Build staged, and update_url is set to: %s',
674 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700675 elif repair:
676 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700677 logging.debug('Build staged, and update_url is set to: %s',
678 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400679 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700680 raise autoupdater.ChromiumOSError(
681 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500682
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500683 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800684 # In case the system is in a bad state, we always reboot the machine
685 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700686 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500687 self.run('stop update-engine; start update-engine')
688 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800689
Chris Sosaa3ac2152012-05-23 22:23:13 -0700690 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700691 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800692 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400693 # Remove cros-version and job_repo_url host attribute from host.
694 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800695 # If the DUT is already running the same build, try stateful update
696 # first. Stateful update does not update kernel and tends to run much
697 # faster than a full reimage.
698 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700699 updated = self._try_stateful_update(
700 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800701 if updated:
702 logging.info('DUT is updated with stateful update.')
703 except Exception as e:
704 logging.exception(e)
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700705 logging.warning('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700706
Dan Shi0f466e82013-02-22 15:44:58 -0800707 inactive_kernel = None
708 # Do a full update if stateful update is not applicable or failed.
709 if not updated:
Chris Sosab7612bc2013-03-21 10:32:37 -0700710 # TODO(sosa): Remove temporary hack to get rid of bricked machines
711 # that can't update due to a corrupted policy.
712 self.run('rm -rf /var/lib/whitelist')
Danny Chanad490bd2014-11-04 14:15:57 -0800713 self.run('mkdir /var/lib/whitelist')
Chris Sosab7612bc2013-03-21 10:32:37 -0700714 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400715 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700716
Dan Shi0f466e82013-02-22 15:44:58 -0800717 if updater.run_update(force_update):
718 updated = True
719 # Figure out active and inactive kernel.
720 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700721
Dan Shi0f466e82013-02-22 15:44:58 -0800722 # Ensure inactive kernel has higher priority than active.
723 if (updater.get_kernel_priority(inactive_kernel)
724 < updater.get_kernel_priority(active_kernel)):
725 raise autoupdater.ChromiumOSError(
726 'Update failed. The priority of the inactive kernel'
727 ' partition is less than that of the active kernel'
728 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700729
Dan Shi0f466e82013-02-22 15:44:58 -0800730 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700731 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi5699ac22014-12-19 10:55:49 -0800732
733 if updated:
Dan Shi0f466e82013-02-22 15:44:58 -0800734 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400735 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700736 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800737
Simran Basiae08c8c2014-09-02 11:17:26 -0700738 logging.debug('Cleaning up old autotest directories.')
739 try:
740 installed_autodir = autotest.Autotest.get_installed_autodir(self)
741 self.run('rm -rf ' + installed_autodir)
742 except autotest.AutodirNotFoundError:
743 logging.debug('No autotest installed directory found.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700744
745
Dan Shi9cb0eec2014-06-03 09:04:50 -0700746 def _clear_fw_version_labels(self):
747 """Clear firmware version labels from the machine."""
748 labels = self._AFE.get_labels(
749 name__startswith=provision.FW_VERSION_PREFIX,
750 host__hostname=self.hostname)
751 for label in labels:
752 label.remove_hosts(hosts=[self.hostname])
753
754
755 def _add_fw_version_label(self, build):
756 """Add firmware version label to the machine.
757
758 @param build: Build of firmware.
759
760 """
761 fw_label = provision.fw_version_to_label(build)
762 provision.ensure_label_exists(fw_label)
763 label = self._AFE.get_labels(name__startswith=fw_label)[0]
764 label.add_hosts([self.hostname])
765
766
767 def firmware_install(self, build=None):
768 """Install firmware to the DUT.
769
770 Use stateful update if the DUT is already running the same build.
771 Stateful update does not update kernel and tends to run much faster
772 than a full reimage. If the DUT is running a different build, or it
773 failed to do a stateful update, full update, including kernel update,
774 will be applied to the DUT.
775
776 Once a host enters firmware_install its fw_version label will be
777 removed. After the firmware is updated successfully, a new fw_version
778 label will be added to the host.
779
780 @param build: The build version to which we want to provision the
781 firmware of the machine,
782 e.g. 'link-firmware/R22-2695.1.144'.
783
784 TODO(dshi): After bug 381718 is fixed, update here with corresponding
785 exceptions that could be raised.
786
787 """
788 if not self.servo:
789 raise error.TestError('Host %s does not have servo.' %
790 self.hostname)
791
792 # TODO(fdeng): use host.get_board() after
793 # crbug.com/271834 is fixed.
794 board = self._get_board_from_afe()
795
796 # If build is not set, assume it's repair mode and try to install
797 # firmware from stable CrOS.
798 if not build:
799 build = self.get_repair_image_name()
800
801 config = FAFTConfig(board)
802 if config.use_u_boot:
803 ap_image = 'image-%s.bin' % board
804 else: # Depthcharge platform
805 ap_image = 'image.bin'
806 ec_image = 'ec.bin'
807 ds = dev_server.ImageServer.resolve(build)
808 ds.stage_artifacts(build, ['firmware'])
809
810 tmpd = autotemp.tempdir(unique_id='fwimage')
811 try:
812 fwurl = self._FW_IMAGE_URL_PATTERN % (ds.url(), build)
813 local_tarball = os.path.join(tmpd.name, os.path.basename(fwurl))
814 server_utils.system('wget -O %s %s' % (local_tarball, fwurl),
815 timeout=60)
816 server_utils.system('tar xf %s -C %s %s %s' %
817 (local_tarball, tmpd.name, ap_image, ec_image),
818 timeout=60)
819 server_utils.system('tar xf %s --wildcards -C %s "dts/*"' %
820 (local_tarball, tmpd.name),
821 timeout=60, ignore_status=True)
822
823 self._clear_fw_version_labels()
824 logging.info('Will re-program EC now')
825 self.servo.program_ec(os.path.join(tmpd.name, ec_image))
826 logging.info('Will re-program BIOS now')
827 self.servo.program_bios(os.path.join(tmpd.name, ap_image))
828 self.servo.get_power_state_controller().reset()
829 time.sleep(self.servo.BOOT_DELAY)
830 self._add_fw_version_label()
831 finally:
832 tmpd.clean()
833
834
Dan Shi10e992b2013-08-30 11:02:59 -0700835 def show_update_engine_log(self):
836 """Output update engine log."""
MK Ryu35d661e2014-09-25 17:44:10 -0700837 logging.debug('Dumping %s', client_constants.UPDATE_ENGINE_LOG)
838 self.run('cat %s' % client_constants.UPDATE_ENGINE_LOG)
Dan Shi10e992b2013-08-30 11:02:59 -0700839
840
Richard Barnette82c35912012-11-20 10:09:10 -0800841 def _get_board_from_afe(self):
842 """Retrieve this host's board from its labels in the AFE.
843
844 Looks for a host label of the form "board:<board>", and
845 returns the "<board>" part of the label. `None` is returned
846 if there is not a single, unique label matching the pattern.
847
848 @returns board from label, or `None`.
849 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700850 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800851
852
853 def get_build(self):
854 """Retrieve the current build for this Host from the AFE.
855
856 Looks through this host's labels in the AFE to determine its build.
857
858 @returns The current build or None if it could not find it or if there
859 were multiple build labels assigned to this host.
860 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700861 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800862
863
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500864 def _install_repair(self):
865 """Attempt to repair this host using upate-engine.
866
867 If the host is up, try installing the DUT with a stable
Dan Shi6964fa52014-12-18 11:04:27 -0800868 "repair" version of Chrome OS as defined in afe_stable_versions table.
869 If the table is not setup, global_config value under
870 CROS.stable_cros_version will be used instead.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500871
Scott Zawalski62bacae2013-03-05 10:40:32 -0500872 @raises AutoservRepairMethodNA if the DUT is not reachable.
873 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500874
875 """
876 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500877 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500878 logging.info('Attempting to reimage machine to repair image.')
879 try:
880 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700881 except autoupdater.ChromiumOSError as e:
882 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500883 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500884 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500885
886
Dan Shi2c88eed2013-11-12 10:18:38 -0800887 def _install_repair_with_powerwash(self):
Dan Shi9cc48452013-11-12 12:39:26 -0800888 """Attempt to powerwash first then repair this host using update-engine.
Dan Shi2c88eed2013-11-12 10:18:38 -0800889
Dan Shi9cc48452013-11-12 12:39:26 -0800890 update-engine may fail due to a bad image. In such case, powerwash
891 may help to cleanup the DUT for update-engine to work again.
Dan Shi2c88eed2013-11-12 10:18:38 -0800892
893 @raises AutoservRepairMethodNA if the DUT is not reachable.
894 @raises ChromiumOSError if the install failed for some reason.
895
896 """
897 if not self.is_up():
898 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
899
900 logging.info('Attempting to powerwash the DUT.')
901 self.run('echo "fast safe" > '
902 '/mnt/stateful_partition/factory_install_reset')
903 self.reboot(timeout=self.POWERWASH_BOOT_TIMEOUT, wait=True)
904 if not self.is_up():
Dan Shi9cc48452013-11-12 12:39:26 -0800905 logging.error('Powerwash failed. DUT did not come back after '
Dan Shi2c88eed2013-11-12 10:18:38 -0800906 'reboot.')
907 raise error.AutoservRepairFailure(
908 'DUT failed to boot from powerwash after %d seconds' %
909 self.POWERWASH_BOOT_TIMEOUT)
910
911 logging.info('Powerwash succeeded.')
912 self._install_repair()
913
914
beepsf079cfb2013-09-18 17:49:51 -0700915 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
916 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500917 """
918 Re-install the OS on the DUT by:
919 1) installing a test image on a USB storage device attached to the Servo
920 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800921 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700922 3) installing the image with chromeos-install.
923
Scott Zawalski62bacae2013-03-05 10:40:32 -0500924 @param image_url: If specified use as the url to install on the DUT.
925 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700926 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
927 Factory images need a longer usb_boot_timeout than regular
928 cros images.
929 @param install_timeout: The timeout to use when installing the chromeos
930 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800931
Scott Zawalski62bacae2013-03-05 10:40:32 -0500932 @raises AutoservError if the image fails to boot.
beepsf079cfb2013-09-18 17:49:51 -0700933
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800934 """
beepsf079cfb2013-09-18 17:49:51 -0700935 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
936 % usb_boot_timeout)
937 logging.info('Downloading image to USB, then booting from it. Usb boot '
938 'timeout = %s', usb_boot_timeout)
939 timer = stats.Timer(usb_boot_timer_key)
940 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700941 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700942 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500943 raise error.AutoservRepairFailure(
944 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700945 usb_boot_timeout)
946 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500947
beepsf079cfb2013-09-18 17:49:51 -0700948 install_timer_key = ('servo_install.install_timeout_%s'
949 % install_timeout)
950 timer = stats.Timer(install_timer_key)
951 timer.start()
952 logging.info('Installing image through chromeos-install.')
MK Ryu35d661e2014-09-25 17:44:10 -0700953 self.run('chromeos-install --yes --lab_preserve_logs=%s' %
954 self._LOGS_TO_COLLECT_FILE,
955 timeout=install_timeout)
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800956 self.run('halt')
beepsf079cfb2013-09-18 17:49:51 -0700957 timer.stop()
958
959 logging.info('Power cycling DUT through servo.')
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800960 self.servo.get_power_state_controller().power_off()
Fang Dengafb88142013-05-30 17:44:31 -0700961 self.servo.switch_usbkey('off')
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800962 # N.B. The Servo API requires that we use power_on() here
963 # for two reasons:
964 # 1) After turning on a DUT in recovery mode, you must turn
965 # it off and then on with power_on() once more to
966 # disable recovery mode (this is a Parrot specific
967 # requirement).
968 # 2) After power_off(), the only way to turn on is with
969 # power_on() (this is a Storm specific requirement).
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700970 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -0700971
972 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800973 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
974 raise error.AutoservError('DUT failed to reboot installed '
975 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500976 self.BOOT_TIMEOUT)
977
978
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700979 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500980 """Reinstall the DUT utilizing servo and a test image.
981
982 Re-install the OS on the DUT by:
983 1) installing a test image on a USB storage device attached to the Servo
984 board,
985 2) booting that image in recovery mode, and then
986 3) installing the image with chromeos-install.
987
Scott Zawalski62bacae2013-03-05 10:40:32 -0500988 @raises AutoservRepairMethodNA if the device does not have servo
989 support.
990
991 """
992 if not self.servo:
993 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
994 'DUT has no servo support.')
995
996 logging.info('Attempting to recovery servo enabled device with '
997 'servo_repair_reinstall')
998
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700999 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001000 self.servo_install(image_url)
1001
1002
1003 def _servo_repair_power(self):
1004 """Attempt to repair DUT using an attached Servo.
1005
1006 Attempt to power on the DUT via power_long_press.
1007
1008 @raises AutoservRepairMethodNA if the device does not have servo
1009 support.
1010 @raises AutoservRepairFailure if the repair fails for any reason.
1011 """
1012 if not self.servo:
1013 raise error.AutoservRepairMethodNA('Repair Power NA: '
1014 'DUT has no servo support.')
1015
1016 logging.info('Attempting to recover servo enabled device by '
1017 'powering it off and on.')
1018 self.servo.get_power_state_controller().power_off()
1019 self.servo.get_power_state_controller().power_on()
1020 if self.wait_up(self.BOOT_TIMEOUT):
1021 return
1022
1023 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -08001024
1025
Richard Barnette82c35912012-11-20 10:09:10 -08001026 def _powercycle_to_repair(self):
1027 """Utilize the RPM Infrastructure to bring the host back up.
1028
1029 If the host is not up/repaired after the first powercycle we utilize
1030 auto fallback to the last good install by powercycling and rebooting the
1031 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001032
1033 @raises AutoservRepairMethodNA if the device does not support remote
1034 power.
1035 @raises AutoservRepairFailure if the repair fails for any reason.
1036
Richard Barnette82c35912012-11-20 10:09:10 -08001037 """
Scott Zawalski62bacae2013-03-05 10:40:32 -05001038 if not self.has_power():
1039 raise error.AutoservRepairMethodNA('Device does not support power.')
1040
Richard Barnette82c35912012-11-20 10:09:10 -08001041 logging.info('Attempting repair via RPM powercycle.')
1042 failed_cycles = 0
1043 self.power_cycle()
1044 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
1045 failed_cycles += 1
1046 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -05001047 raise error.AutoservRepairFailure(
1048 'Powercycled host %s %d times; device did not come back'
1049 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -08001050 self.power_cycle()
1051 if failed_cycles == 0:
1052 logging.info('Powercycling was successful first time.')
1053 else:
1054 logging.info('Powercycling was successful after %d failures.',
1055 failed_cycles)
1056
1057
MK Ryu35d661e2014-09-25 17:44:10 -07001058 def _reboot_repair(self):
1059 """SSH to this host and reboot."""
1060 if not self.is_up(self._CHECK_HOST_UP_TIMEOUT_SECS):
1061 raise error.AutoservRepairMethodNA('DUT unreachable for reboot.')
1062 logging.info('Attempting repair via SSH reboot.')
1063 self.reboot(timeout=self.BOOT_TIMEOUT, wait=True)
1064
1065
Prashanth B4d8184f2014-05-05 12:22:02 -07001066 def check_device(self):
1067 """Check if a device is ssh-able, and if so, clean and verify it.
1068
1069 @raise AutoservSSHTimeout: If the ssh ping times out.
1070 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
1071 permissions.
1072 @raise AutoservSshPingHostError: For other AutoservRunErrors during
1073 ssh_ping.
1074 @raises AutoservError: As appropriate, during cleanup and verify.
1075 """
1076 self.ssh_ping()
1077 self.cleanup()
1078 self.verify()
1079
1080
Richard Barnette82c35912012-11-20 10:09:10 -08001081 def repair_full(self):
1082 """Repair a host for repair level NO_PROTECTION.
1083
1084 This overrides the base class function for repair; it does
1085 not call back to the parent class, but instead offers a
1086 simplified implementation based on the capabilities in the
1087 Chrome OS test lab.
1088
Fang Deng5d518f42013-08-02 14:04:32 -07001089 It first verifies and repairs servo if it is a DUT in CrOS
Fang Deng03590af2013-10-07 17:34:20 -07001090 lab and a servo is attached.
Fang Deng5d518f42013-08-02 14:04:32 -07001091
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001092 This escalates in order through the following procedures and verifies
1093 the status using `self.check_device()` after each of them. This is done
1094 until both the repair and the veryfing step succeed.
1095
MK Ryu35d661e2014-09-25 17:44:10 -07001096 Escalation order of repair procedures from less intrusive to
1097 more intrusive repairs:
1098 1. SSH to the DUT and reboot.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001099 2. If there's a servo for the DUT, try to power the DUT off and
1100 on.
MK Ryu35d661e2014-09-25 17:44:10 -07001101 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -08001102 by power-cycling.
MK Ryu35d661e2014-09-25 17:44:10 -07001103 4. Try to re-install to a known stable image using
1104 auto-update.
1105 5. If there's a servo for the DUT, try to re-install via
1106 the servo.
Richard Barnette82c35912012-11-20 10:09:10 -08001107
1108 As with the parent method, the last operation performed on
Prashanth B4d8184f2014-05-05 12:22:02 -07001109 the DUT must be to call `self.check_device()`; If that call fails the
1110 exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -07001111
Scott Zawalski62bacae2013-03-05 10:40:32 -05001112 @raises AutoservRepairTotalFailure if the repair process fails to
1113 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -07001114 @raises ServoHostRepairTotalFailure if the repair process fails to
1115 fix the servo host if one is attached to the DUT.
1116 @raises AutoservSshPermissionDeniedError if it is unable
1117 to ssh to the servo host due to permission error.
1118
Richard Barnette82c35912012-11-20 10:09:10 -08001119 """
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001120 # Caution: Deleting shards relies on repair to always reboot the DUT.
1121
Dan Shi4d478522014-02-14 13:46:32 -08001122 if self._servo_host and not self.servo:
Fang Deng03590af2013-10-07 17:34:20 -07001123 try:
Dan Shi4d478522014-02-14 13:46:32 -08001124 self._servo_host.repair_full()
Fang Deng03590af2013-10-07 17:34:20 -07001125 except Exception as e:
Fang Deng03590af2013-10-07 17:34:20 -07001126 logging.error('Could not create a healthy servo: %s', e)
Dan Shi4d478522014-02-14 13:46:32 -08001127 self.servo = self._servo_host.get_servo()
Fang Deng5d518f42013-08-02 14:04:32 -07001128
MK Ryu35d661e2014-09-25 17:44:10 -07001129 self.try_collect_crashlogs()
1130
Scott Zawalski62bacae2013-03-05 10:40:32 -05001131 # TODO(scottz): This should use something similar to label_decorator,
1132 # but needs to be populated in order so DUTs are repaired with the
1133 # least amount of effort.
MK Ryu35d661e2014-09-25 17:44:10 -07001134 repair_funcs = [self._reboot_repair,
1135 self._servo_repair_power,
1136 self._powercycle_to_repair,
Dan Shi849a1c42014-03-05 11:10:43 -08001137 self._install_repair,
Dan Shi2c88eed2013-11-12 10:18:38 -08001138 self._install_repair_with_powerwash,
MK Ryu35d661e2014-09-25 17:44:10 -07001139 self._servo_repair_reinstall]
Scott Zawalski62bacae2013-03-05 10:40:32 -05001140 errors = []
Simran Basie6130932013-10-01 14:07:52 -07001141 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001142 for repair_func in repair_funcs:
1143 try:
1144 repair_func()
MK Ryu35d661e2014-09-25 17:44:10 -07001145 self.try_collect_crashlogs()
Prashanth B4d8184f2014-05-05 12:22:02 -07001146 self.check_device()
Simran Basie6130932013-10-01 14:07:52 -07001147 stats.Counter(
1148 '%s.SUCCEEDED' % repair_func.__name__).increment()
1149 if board:
1150 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001151 '%s.%s.SUCCEEDED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001152 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001153 return
Simran Basie6130932013-10-01 14:07:52 -07001154 except error.AutoservRepairMethodNA as e:
1155 stats.Counter(
1156 '%s.RepairNA' % repair_func.__name__).increment()
1157 if board:
1158 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001159 '%s.%s.RepairNA' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001160 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001161 logging.warning('Repair function NA: %s', e)
Simran Basie6130932013-10-01 14:07:52 -07001162 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -05001163 except Exception as e:
Simran Basie6130932013-10-01 14:07:52 -07001164 stats.Counter(
1165 '%s.FAILED' % repair_func.__name__).increment()
1166 if board:
1167 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001168 '%s.%s.FAILED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001169 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001170 logging.warning('Failed to repair device: %s', e)
Scott Zawalski62bacae2013-03-05 10:40:32 -05001171 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -05001172
Simran Basie6130932013-10-01 14:07:52 -07001173 stats.Counter('Full_Repair_Failed').increment()
1174 if board:
1175 stats.Counter(
1176 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001177 raise error.AutoservRepairTotalFailure(
1178 'All attempts at repairing the device failed:\n%s' %
1179 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -08001180
1181
MK Ryu35d661e2014-09-25 17:44:10 -07001182 def try_collect_crashlogs(self, check_host_up=True):
1183 """
1184 Check if a host is up and logs need to be collected from the host,
1185 if yes, collect them.
1186
1187 @param check_host_up: Flag for checking host is up. Default is True.
1188 """
1189 try:
1190 crash_job = self._need_crash_logs()
1191 if crash_job:
1192 logging.debug('%s: Job %s was crashed', self._CRASHLOGS_PREFIX,
1193 crash_job)
1194 if not check_host_up or self.is_up(
1195 self._CHECK_HOST_UP_TIMEOUT_SECS):
1196 self._collect_crashlogs(crash_job)
1197 logging.debug('%s: Completed collecting logs for the '
1198 'crashed job %s', self._CRASHLOGS_PREFIX,
1199 crash_job)
1200 except Exception as e:
1201 # Exception should not result in repair failure.
1202 # Therefore, suppress all exceptions here.
1203 logging.error('%s: Failed while trying to collect crash-logs: %s',
1204 self._CRASHLOGS_PREFIX, e)
1205
1206
1207 def _need_crash_logs(self):
1208 """Get the value of need_crash_logs attribute of this host.
1209
1210 @return: Value string of need_crash_logs attribute
1211 None if there is no need_crash_logs attribute
1212 """
1213 attrs = self._AFE.get_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1214 hostname=self.hostname)
1215 assert len(attrs) < 2
1216 return attrs[0].value if attrs else None
1217
1218
1219 def _collect_crashlogs(self, job_id):
1220 """Grab logs from the host where a job was crashed.
1221
1222 First, check if PRIOR_LOGS_DIR exists in the host.
1223 If yes, collect them.
1224 Otherwise, check if a lab-machine marker (_LAB_MACHINE_FILE) exists
1225 in the host.
1226 If yes, the host was repaired automatically, and we collect normal
1227 system logs.
1228
1229 @param job_id: Id of the job that was crashed.
1230 """
1231 crashlogs_dir = crashcollect.get_crashinfo_dir(self,
1232 constants.CRASHLOGS_DEST_DIR_PREFIX)
1233 flag_prior_logs = False
1234
1235 if self.path_exists(client_constants.PRIOR_LOGS_DIR):
1236 flag_prior_logs = True
1237 self._collect_prior_logs(crashlogs_dir)
1238 elif self.path_exists(self._LAB_MACHINE_FILE):
1239 self._collect_system_logs(crashlogs_dir)
1240 else:
1241 logging.warning('%s: Host was manually re-installed without '
1242 '--lab_preserve_log option. Skip collecting '
1243 'crash-logs.', self._CRASHLOGS_PREFIX)
1244
1245 # We make crash collection be one-time effort.
1246 # _collect_prior_logs() and _collect_system_logs() will not throw
1247 # any exception, and following codes will be executed even when
1248 # those methods fail.
1249 # _collect_crashlogs() is called only when the host is up (refer
1250 # to try_collect_crashlogs()). We assume _collect_prior_logs() and
1251 # _collect_system_logs() fail rarely when the host is up.
1252 # In addition, it is not clear how many times we should try crash
1253 # collection again while not triggering next repair unnecessarily.
1254 # Threfore, we try crash collection one time.
1255
1256 # Create a marker file as soon as log collection is done.
1257 # Leave the job id to this marker for gs_offloader to consume.
1258 marker_file = os.path.join(crashlogs_dir, constants.CRASHLOGS_MARKER)
1259 with open(marker_file, 'a') as f:
1260 f.write('%s\n' % job_id)
1261
1262 # Remove need_crash_logs attribute
1263 logging.debug('%s: Remove attribute need_crash_logs from host %s',
1264 self._CRASHLOGS_PREFIX, self.hostname)
1265 self._AFE.set_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1266 None, hostname=self.hostname)
1267
1268 if flag_prior_logs:
1269 logging.debug('%s: Remove %s from host %s', self._CRASHLOGS_PREFIX,
1270 client_constants.PRIOR_LOGS_DIR, self.hostname)
1271 self.run('rm -rf %s; sync' % client_constants.PRIOR_LOGS_DIR)
1272 # Wait for a few seconds to make sure the prior command is
1273 # done deep through storage.
1274 time.sleep(self._SAFE_WAIT_SECS)
1275
1276
1277 def _collect_prior_logs(self, crashlogs_dir):
1278 """Grab prior logs that were stashed before re-installing a host.
1279
1280 @param crashlogs_dir: Directory path where crash-logs are stored.
1281 """
1282 logging.debug('%s: Found %s, collecting them...',
1283 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1284 try:
1285 self.collect_logs(client_constants.PRIOR_LOGS_DIR,
1286 crashlogs_dir, False)
1287 logging.debug('%s: %s is collected',
1288 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1289 except Exception as e:
1290 logging.error('%s: Failed to collect %s: %s',
1291 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR,
1292 e)
1293
1294
1295 def _collect_system_logs(self, crashlogs_dir):
1296 """Grab normal system logs from a host.
1297
1298 @param crashlogs_dir: Directory path where crash-logs are stored.
1299 """
1300 logging.debug('%s: Found %s, collecting system logs...',
1301 self._CRASHLOGS_PREFIX, self._LAB_MACHINE_FILE)
1302 sources = server_utils.parse_simple_config(self._LOGS_TO_COLLECT_FILE)
1303 for src in sources:
1304 try:
1305 if self.path_exists(src):
1306 logging.debug('%s: Collecting %s...',
1307 self._CRASHLOGS_PREFIX, src)
1308 dest = server_utils.concat_path_except_last(
1309 crashlogs_dir, src)
1310 self.collect_logs(src, dest, False)
1311 logging.debug('%s: %s is collected',
1312 self._CRASHLOGS_PREFIX, src)
1313 except Exception as e:
1314 logging.error('%s: Failed to collect %s: %s',
1315 self._CRASHLOGS_PREFIX, src, e)
1316
1317
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001318 def close(self):
beeps32a63082013-08-22 14:02:29 -07001319 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -07001320 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001321
1322
Dan Shi49ca0932014-11-14 11:22:27 -08001323 def get_power_supply_info(self):
1324 """Get the output of power_supply_info.
1325
1326 power_supply_info outputs the info of each power supply, e.g.,
1327 Device: Line Power
1328 online: no
1329 type: Mains
1330 voltage (V): 0
1331 current (A): 0
1332 Device: Battery
1333 state: Discharging
1334 percentage: 95.9276
1335 technology: Li-ion
1336
1337 Above output shows two devices, Line Power and Battery, with details of
1338 each device listed. This function parses the output into a dictionary,
1339 with key being the device name, and value being a dictionary of details
1340 of the device info.
1341
1342 @return: The dictionary of power_supply_info, e.g.,
1343 {'Line Power': {'online': 'yes', 'type': 'main'},
1344 'Battery': {'vendor': 'xyz', 'percentage': '100'}}
1345 """
1346 result = self.run('power_supply_info').stdout.strip()
1347 info = {}
1348 device_name = None
1349 device_info = {}
1350 for line in result.split('\n'):
1351 pair = [v.strip() for v in line.split(':')]
1352 if len(pair) != 2:
1353 continue
1354 if pair[0] == 'Device':
1355 if device_name:
1356 info[device_name] = device_info
1357 device_name = pair[1]
1358 device_info = {}
1359 else:
1360 device_info[pair[0]] = pair[1]
1361 if device_name and not device_name in info:
1362 info[device_name] = device_info
1363 return info
1364
1365
1366 def get_battery_percentage(self):
1367 """Get the battery percentage.
1368
1369 @return: The percentage of battery level, value range from 0-100. Return
1370 None if the battery info cannot be retrieved.
1371 """
1372 try:
1373 info = self.get_power_supply_info()
1374 logging.info(info)
1375 return float(info['Battery']['percentage'])
1376 except KeyError, ValueError:
1377 return None
1378
1379
1380 def is_ac_connected(self):
1381 """Check if the dut has power adapter connected and charging.
1382
1383 @return: True if power adapter is connected and charging.
1384 """
1385 try:
1386 info = self.get_power_supply_info()
1387 return info['Line Power']['online'] == 'yes'
1388 except KeyError:
1389 return False
1390
1391
Simran Basi5e6339a2013-03-21 11:34:32 -07001392 def _cleanup_poweron(self):
1393 """Special cleanup method to make sure hosts always get power back."""
1394 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1395 hosts = afe.get_hosts(hostname=self.hostname)
1396 if not hosts or not (self._RPM_OUTLET_CHANGED in
1397 hosts[0].attributes):
1398 return
1399 logging.debug('This host has recently interacted with the RPM'
1400 ' Infrastructure. Ensuring power is on.')
1401 try:
1402 self.power_on()
Dan Shi7dca56e2014-11-11 17:07:56 -08001403 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1404 hostname=self.hostname)
Simran Basi5e6339a2013-03-21 11:34:32 -07001405 except rpm_client.RemotePowerException:
Simran Basi5e6339a2013-03-21 11:34:32 -07001406 logging.error('Failed to turn Power On for this host after '
1407 'cleanup through the RPM Infrastructure.')
Dan Shi7dca56e2014-11-11 17:07:56 -08001408 es_utils.ESMetadata().post(
1409 type_str='RPM_poweron_failure',
1410 metadata={'hostname': self.hostname})
Dan Shi49ca0932014-11-14 11:22:27 -08001411
1412 battery_percentage = self.get_battery_percentage()
Dan Shif01ebe22014-12-05 13:10:57 -08001413 if battery_percentage and battery_percentage < 50:
Dan Shi49ca0932014-11-14 11:22:27 -08001414 raise
1415 elif self.is_ac_connected():
1416 logging.info('The device has power adapter connected and '
1417 'charging. No need to try to turn RPM on '
1418 'again.')
1419 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1420 hostname=self.hostname)
1421 logging.info('Battery level is now at %s%%. The device may '
1422 'still have enough power to run test, so no '
1423 'exception will be raised.', battery_percentage)
1424
Simran Basi5e6339a2013-03-21 11:34:32 -07001425
beepsc87ff602013-07-31 21:53:00 -07001426 def _is_factory_image(self):
1427 """Checks if the image on the DUT is a factory image.
1428
1429 @return: True if the image on the DUT is a factory image.
1430 False otherwise.
1431 """
1432 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1433 return result.exit_status == 0
1434
1435
1436 def _restart_ui(self):
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001437 """Restart the Chrome UI.
beepsc87ff602013-07-31 21:53:00 -07001438
1439 @raises: FactoryImageCheckerException for factory images, since
1440 we cannot attempt to restart ui on them.
1441 error.AutoservRunError for any other type of error that
1442 occurs while restarting ui.
1443 """
1444 if self._is_factory_image():
1445 raise FactoryImageCheckerException('Cannot restart ui on factory '
1446 'images')
1447
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001448 # TODO(jrbarnette): The command to stop/start the ui job
1449 # should live inside cros_ui, too. However that would seem
1450 # to imply interface changes to the existing start()/restart()
1451 # functions, which is a bridge too far (for now).
1452 prompt = cros_ui.get_login_prompt_state(self)
1453 self.run('stop ui; start ui')
1454 cros_ui.wait_for_chrome_ready(prompt, self)
beepsc87ff602013-07-31 21:53:00 -07001455
1456
1457 def cleanup(self):
MK Ryu35d661e2014-09-25 17:44:10 -07001458 self.run('rm -f %s' % client_constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001459 try:
beepsc87ff602013-07-31 21:53:00 -07001460 self._restart_ui()
1461 except (error.AutotestRunError, error.AutoservRunError,
1462 FactoryImageCheckerException):
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001463 logging.warning('Unable to restart ui, rebooting device.')
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001464 # Since restarting the UI fails fall back to normal Autotest
1465 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001466 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001467 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001468 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001469 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001470
1471
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001472 def reboot(self, **dargs):
1473 """
1474 This function reboots the site host. The more generic
1475 RemoteHost.reboot() performs sync and sleeps for 5
1476 seconds. This is not necessary for Chrome OS devices as the
1477 sync should be finished in a short time during the reboot
1478 command.
1479 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001480 if 'reboot_cmd' not in dargs:
Doug Anderson7d5aeb22014-02-27 15:12:17 -08001481 reboot_timeout = dargs.get('reboot_timeout', 10)
1482 dargs['reboot_cmd'] = ('((reboot & sleep %d; reboot -f &)'
1483 ' </dev/null >/dev/null 2>&1 &)' %
1484 reboot_timeout)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001485 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001486 if 'fastsync' not in dargs:
1487 dargs['fastsync'] = True
Michael Liangda8c60a2014-06-03 13:24:51 -07001488
Charlie Mooneya8e6dab2014-05-29 14:37:55 -07001489 # For purposes of logging reboot times:
1490 # Get the board name i.e. 'daisy_spring'
Michael Liangca4f5a62014-07-10 15:45:13 -07001491 board_fullname = self.get_board()
1492
1493 # Strip the prefix and add it to dargs.
1494 dargs['board'] = board_fullname[board_fullname.find(':')+1:]
Fang Deng0ca40e22013-08-27 17:47:44 -07001495 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001496
1497
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07001498 def suspend(self, **dargs):
1499 """
1500 This function suspends the site host.
1501 """
1502 suspend_time = dargs.get('suspend_time', 60)
1503 dargs['timeout'] = suspend_time
1504 if 'suspend_cmd' not in dargs:
1505 cmd = ' && '.join(['echo 0 > /sys/class/rtc/rtc0/wakealarm',
1506 'echo +%d > /sys/class/rtc/rtc0/wakealarm' % suspend_time,
1507 'powerd_dbus_suspend --delay=0 &'])
1508 dargs['suspend_cmd'] = ('(( %s )'
1509 '< /dev/null >/dev/null 2>&1 &)' % cmd)
1510 super(CrosHost, self).suspend(**dargs)
1511
1512
Simran Basiec564392014-08-25 16:48:09 -07001513 def upstart_status(self, service_name):
1514 """Check the status of an upstart init script.
1515
1516 @param service_name: Service to look up.
1517
1518 @returns True if the service is running, False otherwise.
1519 """
1520 return self.run('status %s | grep start/running' %
1521 service_name).stdout.strip() != ''
1522
1523
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001524 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001525 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001526
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001527 Tests for the following conditions:
1528 1. All conditions tested by the parent version of this
1529 function.
1530 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001531 3. Sufficient space in /mnt/stateful_partition/encrypted.
1532 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001533
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001534 """
MK Ryu35d661e2014-09-25 17:44:10 -07001535 # Check if a job was crashed on this host.
1536 # If yes, avoid verification until crash-logs are collected.
1537 if self._need_crash_logs():
1538 raise error.AutoservCrashLogCollectRequired(
1539 'Need to collect crash-logs before verification')
1540
Fang Deng0ca40e22013-08-27 17:47:44 -07001541 super(CrosHost, self).verify_software()
J. Richard Barnette4164d1d2014-12-02 17:52:33 -08001542 self.check_inodes(
1543 '/mnt/stateful_partition',
1544 global_config.global_config.get_config_value(
1545 'SERVER', 'kilo_inodes_required', type=int,
1546 default=100))
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001547 self.check_diskspace(
1548 '/mnt/stateful_partition',
1549 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001550 'SERVER', 'gb_diskspace_required', type=float,
1551 default=20.0))
Gaurav Shahe448af82014-06-19 15:18:59 -07001552 encrypted_stateful_path = '/mnt/stateful_partition/encrypted'
1553 # Not all targets build with encrypted stateful support.
1554 if self.path_exists(encrypted_stateful_path):
1555 self.check_diskspace(
1556 encrypted_stateful_path,
1557 global_config.global_config.get_config_value(
1558 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1559 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001560
Simran Basiec564392014-08-25 16:48:09 -07001561 if not self.upstart_status('system-services'):
Prashanth B5d0a0512014-04-25 12:26:08 -07001562 raise error.AutoservError('Chrome failed to reach login. '
1563 'System services not running.')
1564
beepsc87ff602013-07-31 21:53:00 -07001565 # Factory images don't run update engine,
1566 # goofy controls dbus on these DUTs.
1567 if not self._is_factory_image():
1568 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001569 # Makes sure python is present, loads and can use built in functions.
1570 # We have seen cases where importing cPickle fails with undefined
1571 # symbols in cPickle.so.
1572 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001573
1574
Dan Shi49ca0932014-11-14 11:22:27 -08001575 def verify_hardware(self):
1576 """Verify hardware system of a Chrome OS system.
1577
1578 Check following hardware conditions:
1579 1. Battery level.
1580 2. Is power adapter connected.
1581 """
1582 logging.info('Battery percentage: %s', self.get_battery_percentage())
1583 logging.info('Device %s power adapter connected and charging.',
1584 'has' if self.is_ac_connected() else 'does not have')
1585
1586
Fang Deng96667ca2013-08-01 17:46:18 -07001587 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1588 connect_timeout=None, alive_interval=None):
1589 """Override default make_ssh_command to use options tuned for Chrome OS.
1590
1591 Tuning changes:
1592 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1593 connection failure. Consistency with remote_access.sh.
1594
1595 - ServerAliveInterval=180; which causes SSH to ping connection every
1596 180 seconds. In conjunction with ServerAliveCountMax ensures
1597 that if the connection dies, Autotest will bail out quickly.
1598 Originally tried 60 secs, but saw frequent job ABORTS where
1599 the test completed successfully.
1600
1601 - ServerAliveCountMax=3; consistency with remote_access.sh.
1602
1603 - ConnectAttempts=4; reduce flakiness in connection errors;
1604 consistency with remote_access.sh.
1605
1606 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1607 Host keys change with every new installation, don't waste
1608 memory/space saving them.
1609
1610 - SSH protocol forced to 2; needed for ServerAliveInterval.
1611
1612 @param user User name to use for the ssh connection.
1613 @param port Port on the target host to use for ssh connection.
1614 @param opts Additional options to the ssh command.
1615 @param hosts_file Ignored.
1616 @param connect_timeout Ignored.
1617 @param alive_interval Ignored.
1618 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001619 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1620 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001621 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1622 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1623 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1624 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001625 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1626 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001627
1628
beeps32a63082013-08-22 14:02:29 -07001629 def _create_ssh_tunnel(self, port, local_port):
1630 """Create an ssh tunnel from local_port to port.
1631
1632 @param port: remote port on the host.
1633 @param local_port: local forwarding port.
1634
1635 @return: the tunnel process.
1636 """
1637 # Chrome OS on the target closes down most external ports
1638 # for security. We could open the port, but doing that
1639 # would conflict with security tests that check that only
1640 # expected ports are open. So, to get to the port on the
1641 # target we use an ssh tunnel.
1642 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1643 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1644 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1645 logging.debug('Full tunnel command: %s', tunnel_cmd)
1646 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1647 logging.debug('Started ssh tunnel, local = %d'
1648 ' remote = %d, pid = %d',
1649 local_port, port, tunnel_proc.pid)
1650 return tunnel_proc
1651
1652
Christopher Wileydd181852013-10-10 19:56:58 -07001653 def _setup_rpc(self, port, command_name, remote_pid=None):
beeps32a63082013-08-22 14:02:29 -07001654 """Sets up a tunnel process and performs rpc connection book keeping.
1655
1656 This method assumes that xmlrpc and jsonrpc never conflict, since
1657 we can only either have an xmlrpc or a jsonrpc server listening on
1658 a remote port. As such, it enforces a single proxy->remote port
1659 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1660 and then tries to start an xmlrpc proxy forwarded to the same port,
1661 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1662
1663 1. None of the methods on the xmlrpc proxy will work because
1664 the server listening on B is jsonrpc.
1665
1666 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1667 server, as the only use case currently is goofy, which is tied to
1668 the factory image. It is much easier to handle a failed xmlrpc
1669 call on the client than it is to terminate goofy in this scenario,
1670 as doing the latter might leave the DUT in a hard to recover state.
1671
1672 With the current implementation newer rpc proxy connections will
1673 terminate the tunnel processes of older rpc connections tunneling
1674 to the same remote port. If methods are invoked on the client
1675 after this has happened they will fail with connection closed errors.
1676
1677 @param port: The remote forwarding port.
1678 @param command_name: The name of the remote process, to terminate
1679 using pkill.
1680
1681 @return A url that we can use to initiate the rpc connection.
1682 """
1683 self.rpc_disconnect(port)
1684 local_port = utils.get_unused_port()
1685 tunnel_proc = self._create_ssh_tunnel(port, local_port)
Christopher Wileydd181852013-10-10 19:56:58 -07001686 self._rpc_proxy_map[port] = (command_name, tunnel_proc, remote_pid)
beeps32a63082013-08-22 14:02:29 -07001687 return self._RPC_PROXY_URL % local_port
1688
1689
Christopher Wileyd78249a2013-03-01 13:05:31 -08001690 def xmlrpc_connect(self, command, port, command_name=None,
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001691 ready_test_name=None, timeout_seconds=10,
1692 logfile='/dev/null'):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001693 """Connect to an XMLRPC server on the host.
1694
1695 The `command` argument should be a simple shell command that
1696 starts an XMLRPC server on the given `port`. The command
1697 must not daemonize, and must terminate cleanly on SIGTERM.
1698 The command is started in the background on the host, and a
1699 local XMLRPC client for the server is created and returned
1700 to the caller.
1701
1702 Note that the process of creating an XMLRPC client makes no
1703 attempt to connect to the remote server; the caller is
1704 responsible for determining whether the server is running
1705 correctly, and is ready to serve requests.
1706
Christopher Wileyd78249a2013-03-01 13:05:31 -08001707 Optionally, the caller can pass ready_test_name, a string
1708 containing the name of a method to call on the proxy. This
1709 method should take no parameters and return successfully only
1710 when the server is ready to process client requests. When
1711 ready_test_name is set, xmlrpc_connect will block until the
1712 proxy is ready, and throw a TestError if the server isn't
1713 ready by timeout_seconds.
1714
beeps32a63082013-08-22 14:02:29 -07001715 If a server is already running on the remote port, this
1716 method will kill it and disconnect the tunnel process
1717 associated with the connection before establishing a new one,
1718 by consulting the rpc_proxy_map in rpc_disconnect.
1719
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001720 @param command Shell command to start the server.
1721 @param port Port number on which the server is expected to
1722 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001723 @param command_name String to use as input to `pkill` to
1724 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001725 @param ready_test_name String containing the name of a
1726 method defined on the XMLRPC server.
1727 @param timeout_seconds Number of seconds to wait
1728 for the server to become 'ready.' Will throw a
1729 TestFail error if server is not ready in time.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001730 @param logfile Logfile to send output when running
1731 'command' argument.
Yusuf Mohsinally8d19e3c2013-11-21 14:25:45 -08001732
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001733 """
Christopher Wileyc14f06a2013-10-16 13:55:39 -07001734 # Clean up any existing state. If the caller is willing
1735 # to believe their server is down, we ought to clean up
1736 # any tunnels we might have sitting around.
1737 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001738 # Start the server on the host. Redirection in the command
1739 # below is necessary, because 'ssh' won't terminate until
1740 # background child processes close stdin, stdout, and
1741 # stderr.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001742 remote_cmd = '%s </dev/null >%s 2>&1 & echo $!' % (command, logfile)
Christopher Wileydd181852013-10-10 19:56:58 -07001743 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001744 logging.debug('Started XMLRPC server on host %s, pid = %s',
1745 self.hostname, remote_pid)
1746
Christopher Wileydd181852013-10-10 19:56:58 -07001747 # Tunnel through SSH to be able to reach that remote port.
1748 rpc_url = self._setup_rpc(port, command_name, remote_pid=remote_pid)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001749 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001750
Christopher Wileyd78249a2013-03-01 13:05:31 -08001751 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001752 # retry.retry logs each attempt; calculate delay_sec to
1753 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001754 @retry.retry((socket.error,
1755 xmlrpclib.ProtocolError,
1756 httplib.BadStatusLine),
Chris Sosa65425082013-10-16 13:26:22 -07001757 timeout_min=timeout_seconds / 60.0,
1758 delay_sec=min(max(timeout_seconds / 20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001759 def ready_test():
1760 """ Call proxy.ready_test_name(). """
1761 getattr(proxy, ready_test_name)()
1762 successful = False
1763 try:
1764 logging.info('Waiting %d seconds for XMLRPC server '
1765 'to start.', timeout_seconds)
1766 ready_test()
1767 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001768 finally:
1769 if not successful:
1770 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001771 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001772 logging.info('XMLRPC server started successfully.')
1773 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001774
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001775
Jason Abeleb6f924f2013-11-13 16:01:54 -08001776 def syslog(self, message, tag='autotest'):
1777 """Logs a message to syslog on host.
1778
1779 @param message String message to log into syslog
1780 @param tag String tag prefix for syslog
1781
1782 """
1783 self.run('logger -t "%s" "%s"' % (tag, message))
1784
1785
beeps32a63082013-08-22 14:02:29 -07001786 def jsonrpc_connect(self, port):
1787 """Creates a jsonrpc proxy connection through an ssh tunnel.
1788
1789 This method exists to facilitate communication with goofy (which is
1790 the default system manager on all factory images) and as such, leaves
1791 most of the rpc server sanity checking to the caller. Unlike
1792 xmlrpc_connect, this method does not facilitate the creation of a remote
1793 jsonrpc server, as the only clients of this code are factory tests,
1794 for which the goofy system manager is built in to the image and starts
1795 when the target boots.
1796
1797 One can theoretically create multiple jsonrpc proxies all forwarded
1798 to the same remote port, provided the remote port has an rpc server
1799 listening. However, in doing so we stand the risk of leaking an
1800 existing tunnel process, so we always disconnect any older tunnels
1801 we might have through rpc_disconnect.
1802
1803 @param port: port on the remote host that is serving this proxy.
1804
1805 @return: The client proxy.
1806 """
1807 if not jsonrpclib:
1808 logging.warning('Jsonrpclib could not be imported. Check that '
1809 'site-packages contains jsonrpclib.')
1810 return None
1811
1812 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1813
1814 logging.info('Established a jsonrpc connection through port %s.', port)
1815 return proxy
1816
1817
1818 def rpc_disconnect(self, port):
1819 """Disconnect from an RPC server on the host.
1820
1821 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001822 the given `port`. Also closes the local ssh tunnel created
1823 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001824 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001825 client object; however disconnection will cause all
1826 subsequent calls to methods on the object to fail.
1827
1828 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001829 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001830
1831 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001832 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001833 """
beeps32a63082013-08-22 14:02:29 -07001834 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001835 return
Christopher Wileydd181852013-10-10 19:56:58 -07001836 remote_name, tunnel_proc, remote_pid = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001837 if remote_name:
1838 # We use 'pkill' to find our target process rather than
1839 # a PID, because the host may have rebooted since
1840 # connecting, and we don't want to kill an innocent
1841 # process with the same PID.
1842 #
1843 # 'pkill' helpfully exits with status 1 if no target
1844 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001845 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001846 # status.
1847 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001848 if remote_pid:
1849 logging.info('Waiting for RPC server "%s" shutdown',
1850 remote_name)
1851 start_time = time.time()
1852 while (time.time() - start_time <
1853 self._RPC_SHUTDOWN_TIMEOUT_SECONDS):
1854 running_processes = self.run(
1855 "pgrep -f '%s'" % remote_name,
1856 ignore_status=True).stdout.split()
1857 if not remote_pid in running_processes:
1858 logging.info('Shut down RPC server.')
1859 break
1860 time.sleep(self._RPC_SHUTDOWN_POLLING_PERIOD_SECONDS)
1861 else:
1862 raise error.TestError('Failed to shutdown RPC server %s' %
1863 remote_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001864
1865 if tunnel_proc.poll() is None:
1866 tunnel_proc.terminate()
1867 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1868 else:
1869 logging.debug('Tunnel pid %d terminated early, status %d',
1870 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001871 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001872
1873
beeps32a63082013-08-22 14:02:29 -07001874 def rpc_disconnect_all(self):
1875 """Disconnect all known RPC proxy ports."""
1876 for port in self._rpc_proxy_map.keys():
1877 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001878
1879
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001880 def poor_mans_rpc(self, fun):
1881 """
1882 Calls a function from client utils on the host and returns a string.
1883
1884 @param fun function in client utils namespace.
1885 @return output string from calling fun.
1886 """
Simran Basi263a9d32014-08-19 11:16:51 -07001887 script = 'cd %s/bin; ' % autotest.Autotest.get_installed_autodir(self)
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001888 script += 'python -c "import common; import utils;'
1889 script += 'print utils.%s"' % fun
1890 return script
1891
1892
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001893 def _ping_check_status(self, status):
1894 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001895
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001896 @param status Check the ping status against this value.
1897 @return True iff `status` and the result of ping are the same
1898 (i.e. both True or both False).
1899
1900 """
1901 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1902 return not (status ^ (ping_val == 0))
1903
1904 def _ping_wait_for_status(self, status, timeout):
1905 """Wait for the host to have a given status (UP or DOWN).
1906
1907 Status is checked by polling. Polling will not last longer
1908 than the number of seconds in `timeout`. The polling
1909 interval will be long enough that only approximately
1910 _PING_WAIT_COUNT polling cycles will be executed, subject
1911 to a maximum interval of about one minute.
1912
1913 @param status Waiting will stop immediately if `ping` of the
1914 host returns this status.
1915 @param timeout Poll for at most this many seconds.
1916 @return True iff the host status from `ping` matched the
1917 requested status at the time of return.
1918
1919 """
1920 # _ping_check_status() takes about 1 second, hence the
1921 # "- 1" in the formula below.
1922 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1923 end_time = time.time() + timeout
1924 while time.time() <= end_time:
1925 if self._ping_check_status(status):
1926 return True
1927 if poll_interval > 0:
1928 time.sleep(poll_interval)
1929
1930 # The last thing we did was sleep(poll_interval), so it may
1931 # have been too long since the last `ping`. Check one more
1932 # time, just to be sure.
1933 return self._ping_check_status(status)
1934
1935 def ping_wait_up(self, timeout):
1936 """Wait for the host to respond to `ping`.
1937
1938 N.B. This method is not a reliable substitute for
1939 `wait_up()`, because a host that responds to ping will not
1940 necessarily respond to ssh. This method should only be used
1941 if the target DUT can be considered functional even if it
1942 can't be reached via ssh.
1943
1944 @param timeout Minimum time to allow before declaring the
1945 host to be non-responsive.
1946 @return True iff the host answered to ping before the timeout.
1947
1948 """
1949 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001950
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001951 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001952 """Wait until the host no longer responds to `ping`.
1953
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001954 This function can be used as a slightly faster version of
1955 `wait_down()`, by avoiding potentially long ssh timeouts.
1956
1957 @param timeout Minimum time to allow for the host to become
1958 non-responsive.
1959 @return True iff the host quit answering ping before the
1960 timeout.
1961
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001962 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001963 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001964
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001965 def test_wait_for_sleep(self, sleep_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001966 """Wait for the client to enter low-power sleep mode.
1967
1968 The test for "is asleep" can't distinguish a system that is
1969 powered off; to confirm that the unit was asleep, it is
1970 necessary to force resume, and then call
1971 `test_wait_for_resume()`.
1972
1973 This function is expected to be called from a test as part
1974 of a sequence like the following:
1975
1976 ~~~~~~~~
1977 boot_id = host.get_boot_id()
1978 # trigger sleep on the host
1979 host.test_wait_for_sleep()
1980 # trigger resume on the host
1981 host.test_wait_for_resume(boot_id)
1982 ~~~~~~~~
1983
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001984 @param sleep_timeout time limit in seconds to allow the host sleep.
1985
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001986 @exception TestFail The host did not go to sleep within
1987 the allowed time.
1988 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001989 if sleep_timeout is None:
1990 sleep_timeout = self.SLEEP_TIMEOUT
1991
1992 if not self.ping_wait_down(timeout=sleep_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001993 raise error.TestFail(
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001994 'client failed to sleep after %d seconds' % sleep_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001995
1996
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001997 def test_wait_for_resume(self, old_boot_id, resume_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001998 """Wait for the client to resume from low-power sleep mode.
1999
2000 The `old_boot_id` parameter should be the value from
2001 `get_boot_id()` obtained prior to entering sleep mode. A
2002 `TestFail` exception is raised if the boot id changes.
2003
2004 See @ref test_wait_for_sleep for more on this function's
2005 usage.
2006
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002007 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002008 target host went to sleep.
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002009 @param resume_timeout time limit in seconds to allow the host up.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002010
2011 @exception TestFail The host did not respond within the
2012 allowed time.
2013 @exception TestFail The host responded, but the boot id test
2014 indicated a reboot rather than a sleep
2015 cycle.
2016 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002017 if resume_timeout is None:
2018 resume_timeout = self.RESUME_TIMEOUT
2019
2020 if not self.wait_up(timeout=resume_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002021 raise error.TestFail(
2022 'client failed to resume from sleep after %d seconds' %
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002023 resume_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002024 else:
2025 new_boot_id = self.get_boot_id()
2026 if new_boot_id != old_boot_id:
2027 raise error.TestFail(
2028 'client rebooted, but sleep was expected'
2029 ' (old boot %s, new boot %s)'
2030 % (old_boot_id, new_boot_id))
2031
2032
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002033 def test_wait_for_shutdown(self, shutdown_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002034 """Wait for the client to shut down.
2035
2036 The test for "has shut down" can't distinguish a system that
2037 is merely asleep; to confirm that the unit was down, it is
2038 necessary to force boot, and then call test_wait_for_boot().
2039
2040 This function is expected to be called from a test as part
2041 of a sequence like the following:
2042
2043 ~~~~~~~~
2044 boot_id = host.get_boot_id()
2045 # trigger shutdown on the host
2046 host.test_wait_for_shutdown()
2047 # trigger boot on the host
2048 host.test_wait_for_boot(boot_id)
2049 ~~~~~~~~
2050
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002051 @param shutdown_timeout time limit in seconds to allow the host down.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002052 @exception TestFail The host did not shut down within the
2053 allowed time.
2054 """
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002055 if shutdown_timeout is None:
2056 shutdown_timeout = self.SHUTDOWN_TIMEOUT
2057
2058 if not self.ping_wait_down(timeout=shutdown_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002059 raise error.TestFail(
2060 'client failed to shut down after %d seconds' %
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002061 shutdown_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002062
2063
2064 def test_wait_for_boot(self, old_boot_id=None):
2065 """Wait for the client to boot from cold power.
2066
2067 The `old_boot_id` parameter should be the value from
2068 `get_boot_id()` obtained prior to shutting down. A
2069 `TestFail` exception is raised if the boot id does not
2070 change. The boot id test is omitted if `old_boot_id` is not
2071 specified.
2072
2073 See @ref test_wait_for_shutdown for more on this function's
2074 usage.
2075
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002076 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002077 shut down.
2078
2079 @exception TestFail The host did not respond within the
2080 allowed time.
2081 @exception TestFail The host responded, but the boot id test
2082 indicated that there was no reboot.
2083 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002084 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002085 raise error.TestFail(
2086 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002087 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002088 elif old_boot_id:
2089 if self.get_boot_id() == old_boot_id:
2090 raise error.TestFail(
2091 'client is back up, but did not reboot'
2092 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07002093
2094
2095 @staticmethod
2096 def check_for_rpm_support(hostname):
2097 """For a given hostname, return whether or not it is powered by an RPM.
2098
Simran Basi1df55112013-09-06 11:25:09 -07002099 @param hostname: hostname to check for rpm support.
2100
Simran Basid5e5e272012-09-24 15:23:59 -07002101 @return None if this host does not follows the defined naming format
2102 for RPM powered DUT's in the lab. If it does follow the format,
2103 it returns a regular expression MatchObject instead.
2104 """
Fang Dengdeba14f2014-11-14 11:54:09 -08002105 m = re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
2106 if not m:
2107 return None
2108 try:
2109 lab = int(m.group(1))
2110 row = int(m.group(3))
2111 rack = int(m.group(4))
2112 except (TypeError, ValueError) as e:
2113 return m
2114 if lab == 2 and row>= 1 and row<= 5 and rack>= 1 and rack<= 7:
2115 # TODO(fdeng): temporarily disable support for duts
2116 # behined hydra2 in chromeos2, remove once
2117 # b/17612645 is fixed.
2118 return None
2119 if lab == 4 and (rack == 0 or row == 13):
2120 # TODO(fdeng): disable support for duts behind hydra3
2121 # for chromeos4, remove once b/15410667 is fixed
2122 return None
2123 return m
Simran Basid5e5e272012-09-24 15:23:59 -07002124
2125
2126 def has_power(self):
2127 """For this host, return whether or not it is powered by an RPM.
2128
2129 @return True if this host is in the CROS lab and follows the defined
2130 naming format.
2131 """
Fang Deng0ca40e22013-08-27 17:47:44 -07002132 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002133
2134
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002135 def _set_power(self, state, power_method):
2136 """Sets the power to the host via RPM, Servo or manual.
2137
2138 @param state Specifies which power state to set to DUT
2139 @param power_method Specifies which method of power control to
2140 use. By default "RPM" will be used. Valid values
2141 are the strings "RPM", "manual", "servoj10".
2142
2143 """
2144 ACCEPTABLE_STATES = ['ON', 'OFF']
2145
2146 if state.upper() not in ACCEPTABLE_STATES:
2147 raise error.TestError('State must be one of: %s.'
2148 % (ACCEPTABLE_STATES,))
2149
2150 if power_method == self.POWER_CONTROL_SERVO:
2151 logging.info('Setting servo port J10 to %s', state)
2152 self.servo.set('prtctl3_pwren', state.lower())
2153 time.sleep(self._USB_POWER_TIMEOUT)
2154 elif power_method == self.POWER_CONTROL_MANUAL:
2155 logging.info('You have %d seconds to set the AC power to %s.',
2156 self._POWER_CYCLE_TIMEOUT, state)
2157 time.sleep(self._POWER_CYCLE_TIMEOUT)
2158 else:
2159 if not self.has_power():
2160 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07002161 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
2162 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
2163 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07002164 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07002165
2166
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002167 def power_off(self, power_method=POWER_CONTROL_RPM):
2168 """Turn off power to this host via RPM, Servo or manual.
2169
2170 @param power_method Specifies which method of power control to
2171 use. By default "RPM" will be used. Valid values
2172 are the strings "RPM", "manual", "servoj10".
2173
2174 """
2175 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07002176
2177
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002178 def power_on(self, power_method=POWER_CONTROL_RPM):
2179 """Turn on power to this host via RPM, Servo or manual.
2180
2181 @param power_method Specifies which method of power control to
2182 use. By default "RPM" will be used. Valid values
2183 are the strings "RPM", "manual", "servoj10".
2184
2185 """
2186 self._set_power('ON', power_method)
2187
2188
2189 def power_cycle(self, power_method=POWER_CONTROL_RPM):
2190 """Cycle power to this host by turning it OFF, then ON.
2191
2192 @param power_method Specifies which method of power control to
2193 use. By default "RPM" will be used. Valid values
2194 are the strings "RPM", "manual", "servoj10".
2195
2196 """
2197 if power_method in (self.POWER_CONTROL_SERVO,
2198 self.POWER_CONTROL_MANUAL):
2199 self.power_off(power_method=power_method)
2200 time.sleep(self._POWER_CYCLE_TIMEOUT)
2201 self.power_on(power_method=power_method)
2202 else:
2203 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002204
2205
2206 def get_platform(self):
2207 """Determine the correct platform label for this host.
2208
2209 @returns a string representing this host's platform.
2210 """
2211 crossystem = utils.Crossystem(self)
2212 crossystem.init()
2213 # Extract fwid value and use the leading part as the platform id.
2214 # fwid generally follow the format of {platform}.{firmware version}
2215 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
2216 platform = crossystem.fwid().split('.')[0].lower()
2217 # Newer platforms start with 'Google_' while the older ones do not.
2218 return platform.replace('google_', '')
2219
2220
Hung-ying Tyanb1328032014-04-01 14:18:54 +08002221 def get_architecture(self):
2222 """Determine the correct architecture label for this host.
2223
2224 @returns a string representing this host's architecture.
2225 """
2226 crossystem = utils.Crossystem(self)
2227 crossystem.init()
2228 return crossystem.arch()
2229
2230
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002231 def get_chrome_version(self):
2232 """Gets the Chrome version number and milestone as strings.
2233
2234 Invokes "chrome --version" to get the version number and milestone.
2235
2236 @return A tuple (chrome_ver, milestone) where "chrome_ver" is the
2237 current Chrome version number as a string (in the form "W.X.Y.Z")
2238 and "milestone" is the first component of the version number
2239 (the "W" from "W.X.Y.Z"). If the version number cannot be parsed
2240 in the "W.X.Y.Z" format, the "chrome_ver" will be the full output
2241 of "chrome --version" and the milestone will be the empty string.
2242
2243 """
MK Ryu35d661e2014-09-25 17:44:10 -07002244 version_string = self.run(client_constants.CHROME_VERSION_COMMAND).stdout
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002245 return utils.parse_chrome_version(version_string)
2246
Aviv Keshet74c89a92013-02-04 15:18:30 -08002247 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07002248 def get_board(self):
2249 """Determine the correct board label for this host.
2250
2251 @returns a string representing this host's board.
2252 """
2253 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
2254 run_method=self.run)
2255 board = release_info['CHROMEOS_RELEASE_BOARD']
2256 # Devices in the lab generally have the correct board name but our own
2257 # development devices have {board_name}-signed-{key_type}. The board
2258 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08002259 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07002260 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08002261 return board_format_string % board.split('-')[0]
2262 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07002263
2264
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002265 @label_decorator('board_freq_mem')
2266 def get_board_with_frequency_and_memory(self):
2267 """
2268 Determines the board name with frequency and memory.
2269
2270 @returns a more detailed string representing the board. Examples are
2271 butterfly_1.1GHz_2GB, link_1.8GHz_4GB, x86-zgb_1.7GHz_2GB
2272 """
2273 board = self.run(self.poor_mans_rpc(
2274 'get_board_with_frequency_and_memory()')).stdout
2275 return 'board_freq_mem:%s' % str.strip(board)
2276
2277
Aviv Keshet74c89a92013-02-04 15:18:30 -08002278 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002279 def has_lightsensor(self):
2280 """Determine the correct board label for this host.
2281
2282 @returns the string 'lightsensor' if this host has a lightsensor or
2283 None if it does not.
2284 """
2285 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08002286 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07002287 try:
2288 # Run the search cmd following the symlinks. Stderr_tee is set to
2289 # None as there can be a symlink loop, but the command will still
2290 # execute correctly with a few messages printed to stderr.
2291 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
2292 return 'lightsensor'
2293 except error.AutoservRunError:
2294 # egrep exited with a return code of 1 meaning none of the possible
2295 # lightsensor files existed.
2296 return None
2297
2298
Aviv Keshet74c89a92013-02-04 15:18:30 -08002299 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002300 def has_bluetooth(self):
2301 """Determine the correct board label for this host.
2302
2303 @returns the string 'bluetooth' if this host has bluetooth or
2304 None if it does not.
2305 """
2306 try:
2307 self.run('test -d /sys/class/bluetooth/hci0')
2308 # test exited with a return code of 0.
2309 return 'bluetooth'
2310 except error.AutoservRunError:
2311 # test exited with a return code 1 meaning the directory did not
2312 # exist.
2313 return None
2314
2315
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002316 @label_decorator('gpu_family')
2317 def get_gpu_family(self):
2318 """
2319 Determine GPU family.
2320
2321 @returns a string representing the gpu family. Examples are mali, tegra,
2322 pinetrail, sandybridge, ivybridge, haswell and baytrail.
2323 """
2324 gpu_family = self.run(self.poor_mans_rpc('get_gpu_family()')).stdout
2325 return 'gpu_family:%s' % str.strip(gpu_family)
2326
2327
Ilja Friedel0ce0b602013-08-15 18:45:27 -07002328 @label_decorator('graphics')
2329 def get_graphics(self):
2330 """
2331 Determine the correct board label for this host.
2332
2333 @returns a string representing this host's graphics. For now ARM boards
2334 return graphics:gles while all other boards return graphics:gl. This
2335 may change over time, but for robustness reasons this should avoid
2336 executing code in actual graphics libraries (which may not be ready and
2337 is tested by graphics_GLAPICheck).
2338 """
2339 uname = self.run('uname -a').stdout.lower()
2340 if 'arm' in uname:
2341 return 'graphics:gles'
2342 return 'graphics:gl'
2343
2344
Bill Richardson4f595f52014-02-13 16:20:26 -08002345 @label_decorator('ec')
2346 def get_ec(self):
2347 """
2348 Determine the type of EC on this host.
2349
2350 @returns a string representing this host's embedded controller type.
2351 At present, it only returns "ec:cros", for Chrome OS ECs. Other types
2352 of EC (or none) don't return any strings, since no tests depend on
2353 those.
2354 """
2355 cmd = 'mosys ec info'
2356 # The output should look like these, so that the last field should
2357 # match our EC version scheme:
2358 #
2359 # stm | stm32f100 | snow_v1.3.139-375eb9f
2360 # ti | Unknown-10de | peppy_v1.5.114-5d52788
2361 #
2362 # Non-Chrome OS ECs will look like these:
2363 #
2364 # ENE | KB932 | 00BE107A00
2365 # ite | it8518 | 3.08
2366 #
2367 # And some systems don't have ECs at all (Lumpy, for example).
2368 regexp = r'^.*\|\s*(\S+_v\d+\.\d+\.\d+-[0-9a-f]+)\s*$'
2369
2370 ecinfo = self.run(command=cmd, ignore_status=True)
2371 if ecinfo.exit_status == 0:
2372 res = re.search(regexp, ecinfo.stdout)
2373 if res:
2374 logging.info("EC version is %s", res.groups()[0])
2375 return 'ec:cros'
2376 logging.info("%s got: %s", cmd, ecinfo.stdout)
2377 # Has an EC, but it's not a Chrome OS EC
2378 return None
2379 logging.info("%s exited with status %d", cmd, ecinfo.exit_status)
2380 # No EC present
2381 return None
2382
2383
Alec Berg31b932b2014-04-04 16:09:11 -07002384 @label_decorator('accels')
2385 def get_accels(self):
2386 """
2387 Determine the type of accelerometers on this host.
2388
2389 @returns a string representing this host's accelerometer type.
2390 At present, it only returns "accel:cros-ec", for accelerometers
2391 attached to a Chrome OS EC, or none, if no accelerometers.
2392 """
2393 # Check to make sure we have ectool
2394 rv = self.run('which ectool', ignore_status=True)
2395 if rv.exit_status:
2396 logging.info("No ectool cmd found, assuming no EC accelerometers")
2397 return None
2398
2399 # Check that the EC supports the motionsense command
2400 rv = self.run('ectool motionsense', ignore_status=True)
2401 if rv.exit_status:
2402 logging.info("EC does not support motionsense command "
2403 "assuming no EC accelerometers")
2404 return None
2405
2406 # Check that EC motion sensors are active
2407 active = self.run('ectool motionsense active').stdout.split('\n')
2408 if active[0] == "0":
2409 logging.info("Motion sense inactive, assuming no EC accelerometers")
2410 return None
2411
2412 logging.info("EC accelerometers found")
2413 return 'accel:cros-ec'
2414
2415
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002416 @label_decorator('chameleon')
2417 def has_chameleon(self):
2418 """Determine if a Chameleon connected to this host.
2419
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002420 @returns a list containing two strings ('chameleon' and
2421 'chameleon:' + label, e.g. 'chameleon:hdmi') if this host
2422 has a Chameleon or None if it has not.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002423 """
2424 if self._chameleon_host:
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002425 return ['chameleon', 'chameleon:' + self.chameleon.get_label()]
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002426 else:
2427 return None
2428
2429
Cheng-Yi Chiangf4104ff2014-12-23 19:39:01 +08002430 @label_decorator('audio_loopback_dongle')
2431 def has_loopback_dongle(self):
2432 """Determine if an audio loopback dongle is plugged to this host.
2433
2434 @returns 'audio_loopback_dongle' when there is an audio loopback dongle
2435 plugged to this host.
2436 None when there is no audio loopback dongle
2437 plugged to this host.
2438 """
2439 server_info = self.run(command='cras_test_client --dump_s',
2440 ignore_status=True).stdout
2441 if (cras_utils.node_type_is_plugged('HEADPHONE', server_info) and
2442 cras_utils.node_type_is_plugged('MIC', server_info)):
2443 return 'audio_loopback_dongle'
2444 else:
2445 return None
2446
2447
Derek Basehorec71ff622014-07-07 15:18:40 -07002448 @label_decorator('power_supply')
2449 def get_power_supply(self):
2450 """
2451 Determine what type of power supply the host has
2452
2453 @returns a string representing this host's power supply.
2454 'power:battery' when the device has a battery intended for
2455 extended use
2456 'power:AC_primary' when the device has a battery not intended
2457 for extended use (for moving the machine, etc)
2458 'power:AC_only' when the device has no battery at all.
2459 """
2460 psu = self.run(command='mosys psu type', ignore_status=True)
2461 if psu.exit_status:
2462 # The psu command for mosys is not included for all platforms. The
2463 # assumption is that the device will have a battery if the command
2464 # is not found.
2465 return 'power:battery'
2466
2467 psu_str = psu.stdout.strip()
2468 if psu_str == 'unknown':
2469 return None
2470
2471 return 'power:%s' % psu_str
2472
2473
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002474 @label_decorator('storage')
2475 def get_storage(self):
2476 """
2477 Determine the type of boot device for this host.
2478
2479 Determine if the internal device is SCSI or dw_mmc device.
2480 Then check that it is SSD or HDD or eMMC or something else.
2481
2482 @returns a string representing this host's internal device type.
2483 'storage:ssd' when internal device is solid state drive
2484 'storage:hdd' when internal device is hard disk drive
2485 'storage:mmc' when internal device is mmc drive
2486 None When internal device is something else or
2487 when we are unable to determine the type
2488 """
2489 # The output should be /dev/mmcblk* for SD/eMMC or /dev/sd* for scsi
2490 rootdev_cmd = ' '.join(['. /usr/sbin/write_gpt.sh;',
2491 '. /usr/share/misc/chromeos-common.sh;',
2492 'load_base_vars;',
2493 'get_fixed_dst_drive'])
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002494 rootdev = self.run(command=rootdev_cmd, ignore_status=True)
2495 if rootdev.exit_status:
2496 logging.info("Fail to run %s", rootdev_cmd)
2497 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002498 rootdev_str = rootdev.stdout.strip()
2499
2500 if not rootdev_str:
2501 return None
2502
2503 rootdev_base = os.path.basename(rootdev_str)
2504
2505 mmc_pattern = '/dev/mmcblk[0-9]'
2506 if re.match(mmc_pattern, rootdev_str):
2507 # Use type to determine if the internal device is eMMC or somthing
2508 # else. We can assume that MMC is always an internal device.
2509 type_cmd = 'cat /sys/block/%s/device/type' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002510 type = self.run(command=type_cmd, ignore_status=True)
2511 if type.exit_status:
2512 logging.info("Fail to run %s", type_cmd)
2513 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002514 type_str = type.stdout.strip()
2515
2516 if type_str == 'MMC':
2517 return 'storage:mmc'
2518
2519 scsi_pattern = '/dev/sd[a-z]+'
2520 if re.match(scsi_pattern, rootdev.stdout):
2521 # Read symlink for /sys/block/sd* to determine if the internal
2522 # device is connected via ata or usb.
2523 link_cmd = 'readlink /sys/block/%s' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002524 link = self.run(command=link_cmd, ignore_status=True)
2525 if link.exit_status:
2526 logging.info("Fail to run %s", link_cmd)
2527 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002528 link_str = link.stdout.strip()
2529 if 'usb' in link_str:
2530 return None
2531
2532 # Read rotation to determine if the internal device is ssd or hdd.
2533 rotate_cmd = str('cat /sys/block/%s/queue/rotational'
2534 % rootdev_base)
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002535 rotate = self.run(command=rotate_cmd, ignore_status=True)
2536 if rotate.exit_status:
2537 logging.info("Fail to run %s", rotate_cmd)
2538 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002539 rotate_str = rotate.stdout.strip()
2540
2541 rotate_dict = {'0':'storage:ssd', '1':'storage:hdd'}
2542 return rotate_dict.get(rotate_str)
2543
2544 # All other internal device / error case will always fall here
2545 return None
2546
2547
Dan Shi4e9a2aa2014-03-24 14:28:42 -07002548 @label_decorator('servo')
2549 def get_servo(self):
2550 """Determine if the host has a servo attached.
2551
2552 If the host has a working servo attached, it should have a servo label.
2553
2554 @return: string 'servo' if the host has servo attached. Otherwise,
2555 returns None.
2556 """
2557 return 'servo' if self._servo_host else None
2558
2559
Dan Shi5beba472014-05-28 22:46:07 -07002560 @label_decorator('video_labels')
2561 def get_video_labels(self):
2562 """Run /usr/local/bin/avtest_label_detect to get a list of video labels.
2563
2564 Sample output of avtest_label_detect:
2565 Detected label: hw_video_acc_vp8
2566 Detected label: webcam
2567
2568 @return: A list of labels detected by tool avtest_label_detect.
2569 """
2570 try:
Simran Basi40ca8182014-07-17 18:41:20 -07002571 # TODO (sbasi) crbug.com/391081 - Remove once the proper fix has
2572 # landed and supporting images older than the fix is no longer
2573 # necessary.
2574 # Change back to VT1 so avtest_label_detect does not get stuck.
2575 self.run('chvt 1')
Dan Shi5beba472014-05-28 22:46:07 -07002576 result = self.run('/usr/local/bin/avtest_label_detect').stdout
2577 return re.findall('^Detected label: (\w+)$', result, re.M)
2578 except error.AutoservRunError:
2579 # The tool is not installed.
2580 return []
2581
2582
mussa584b4462014-06-20 15:13:28 -07002583 @label_decorator('video_glitch_detection')
2584 def is_video_glitch_detection_supported(self):
2585 """ Determine if a board under test is supported for video glitch
2586 detection tests.
2587
2588 @return: 'video_glitch_detection' if board is supported, None otherwise.
2589 """
2590 parser = ConfigParser.SafeConfigParser()
2591 filename = os.path.join(
2592 common.autotest_dir, 'client/cros/video/device_spec.conf')
2593
2594 dut = self.get_board().replace(ds_constants.BOARD_PREFIX, '')
2595
2596 try:
2597 parser.read(filename)
mussa584b4462014-06-20 15:13:28 -07002598 supported_boards = parser.sections()
2599
Mussa83c84d62014-10-02 12:11:28 -07002600 return 'video_glitch_detection' if dut in supported_boards else None
mussa584b4462014-06-20 15:13:28 -07002601
2602 except ConfigParser.error:
2603 # something went wrong while parsing the conf file
2604 return None
2605
Katherine Threlkeld7b97a9f2014-06-24 13:47:14 -07002606 @label_decorator('touch_labels')
2607 def get_touch(self):
2608 """
2609 Determine whether board under test has a touchpad or touchscreen.
2610
2611 @return: A list of some combination of 'touchscreen' and 'touchpad',
2612 depending on what is present on the device.
2613 """
2614 labels = []
2615 input_cmd = '/opt/google/input/inputcontrol --names -t %s'
2616 for elt in ['touchpad', 'touchscreen']:
2617 if self.run(input_cmd % elt).stdout:
2618 labels.append(elt)
2619 return labels
2620
2621
mussa584b4462014-06-20 15:13:28 -07002622
Simran Basic6f1f7a2012-10-16 10:47:46 -07002623 def get_labels(self):
2624 """Return a list of labels for this given host.
2625
2626 This is the main way to retrieve all the automatic labels for a host
2627 as it will run through all the currently implemented label functions.
2628 """
2629 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08002630 for label_function in self._LABEL_FUNCTIONS:
J. Richard Barnetteb869b222014-09-03 17:55:44 -07002631 try:
2632 label = label_function(self)
2633 except Exception as e:
2634 logging.error('Label function %s failed; ignoring it.',
2635 label_function.__name__)
2636 logging.exception(e)
2637 label = None
Simran Basic6f1f7a2012-10-16 10:47:46 -07002638 if label:
Dan Shi5beba472014-05-28 22:46:07 -07002639 if type(label) is str:
2640 labels.append(label)
2641 elif type(label) is list:
2642 labels.extend(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -07002643 return labels
Dan Shi85276d42014-04-08 22:11:45 -07002644
2645
2646 def is_boot_from_usb(self):
2647 """Check if DUT is boot from USB.
2648
2649 @return: True if DUT is boot from usb.
2650 """
2651 device = self.run('rootdev -s -d').stdout.strip()
2652 removable = int(self.run('cat /sys/block/%s/removable' %
2653 os.path.basename(device)).stdout.strip())
2654 return removable == 1
Helen Zhang17dae2b2014-11-11 09:25:52 -08002655
2656
2657 def read_from_meminfo(self, key):
Dan Shi49ca0932014-11-14 11:22:27 -08002658 """Return the memory info from /proc/meminfo
Helen Zhang17dae2b2014-11-11 09:25:52 -08002659
2660 @param key: meminfo requested
2661
2662 @return the memory value as a string
2663
2664 """
Helen Zhang17dae2b2014-11-11 09:25:52 -08002665 meminfo = self.run('grep %s /proc/meminfo' % key).stdout.strip()
2666 logging.debug('%s', meminfo)
2667 return int(re.search(r'\d+', meminfo).group(0))