blob: f6b25c6e2a0fd5f1bdcb2bb41fa3c9cdf235a1e9 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
mussa584b4462014-06-20 15:13:28 -07005import ConfigParser
Aviv Keshet74c89a92013-02-04 15:18:30 -08006import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07007import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import logging
Dan Shi0f466e82013-02-22 15:44:58 -08009import os
Simran Basid5e5e272012-09-24 15:23:59 -070010import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080011import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070014import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070015
mussa584b4462014-06-20 15:13:28 -070016import common
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.bin import utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070018from autotest_lib.client.common_lib import autotemp
Richard Barnette0c73ffc2012-11-19 15:21:18 -080019from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080022from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080023from autotest_lib.client.common_lib.cros import retry
Michael Liangda8c60a2014-06-03 13:24:51 -070024from autotest_lib.client.common_lib.cros.graphite import stats
MK Ryu35d661e2014-09-25 17:44:10 -070025from autotest_lib.client.cros import constants as client_constants
J. Richard Barnette84890bd2014-02-21 11:05:47 -080026from autotest_lib.client.cros import cros_ui
MK Ryu35d661e2014-09-25 17:44:10 -070027from autotest_lib.server import autoserv_parser
28from autotest_lib.server import autotest
29from autotest_lib.server import constants
30from autotest_lib.server import crashcollect
Dan Shia1ecd5c2013-06-06 11:21:31 -070031from autotest_lib.server import utils as server_utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070032from autotest_lib.server.cros import provision
Scott Zawalski89c44dd2013-02-26 09:28:02 -050033from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070034from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Dan Shi9cb0eec2014-06-03 09:04:50 -070035from autotest_lib.server.cros.faft.config.config import Config as FAFTConfig
Fang Deng96667ca2013-08-01 17:46:18 -070036from autotest_lib.server.hosts import abstract_ssh
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +080037from autotest_lib.server.hosts import chameleon_host
Fang Deng5d518f42013-08-02 14:04:32 -070038from autotest_lib.server.hosts import servo_host
Simran Basidcff4252012-11-20 16:13:20 -080039from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070040
41
beeps32a63082013-08-22 14:02:29 -070042try:
43 import jsonrpclib
44except ImportError:
45 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070046
Fang Dengd1c2b732013-08-20 12:59:46 -070047
beepsc87ff602013-07-31 21:53:00 -070048class FactoryImageCheckerException(error.AutoservError):
49 """Exception raised when an image is a factory image."""
50 pass
51
52
Aviv Keshet74c89a92013-02-04 15:18:30 -080053def add_label_detector(label_function_list, label_list=None, label=None):
54 """Decorator used to group functions together into the provided list.
55 @param label_function_list: List of label detecting functions to add
56 decorated function to.
57 @param label_list: List of detectable labels to add detectable labels to.
58 (Default: None)
59 @param label: Label string that is detectable by this detection function
60 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080061 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070062 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080063 """
64 @param func: The function to be added as a detector.
65 """
66 label_function_list.append(func)
67 if label and label_list is not None:
68 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070069 return func
70 return add_func
71
72
Fang Deng0ca40e22013-08-27 17:47:44 -070073class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070074 """Chromium OS specific subclass of Host."""
75
76 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050077 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070078
Richard Barnette03a0c132012-11-05 12:40:35 -080079 # Timeout values (in seconds) associated with various Chrome OS
80 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070081 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080082 # In general, a good rule of thumb is that the timeout can be up
83 # to twice the typical measured value on the slowest platform.
84 # The times here have not necessarily been empirically tested to
85 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070086 #
87 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080088 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
89 # time to restart the netwowrk.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080090 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070091 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080092 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080093 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070094 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080095 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080096 # network.
beepsf079cfb2013-09-18 17:49:51 -070097 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080098 # POWERWASH_BOOT_TIMEOUT: Time to allow for a reboot that
99 # includes powerwash.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700100
101 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800102 RESUME_TIMEOUT = 10
Tom Wai-Hong Tam4d169ed2014-02-14 11:05:40 +0800103 SHUTDOWN_TIMEOUT = 5
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700104 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700105 USB_BOOT_TIMEOUT = 150
J. Richard Barnette7817b052014-08-28 09:47:29 -0700106 INSTALL_TIMEOUT = 480
Dan Shi2c88eed2013-11-12 10:18:38 -0800107 POWERWASH_BOOT_TIMEOUT = 60
Chris Sosab76e0ee2013-05-22 16:55:41 -0700108
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800109 # REBOOT_TIMEOUT: How long to wait for a reboot.
110 #
Chris Sosab76e0ee2013-05-22 16:55:41 -0700111 # We have a long timeout to ensure we don't flakily fail due to other
112 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
Simran Basi1160e2c2013-10-04 16:00:24 -0700113 # TODO(sbasi - crbug.com/276094) Restore to 5 mins once the 'host did not
114 # return from reboot' bug is solved.
115 REBOOT_TIMEOUT = 480
Chris Sosab76e0ee2013-05-22 16:55:41 -0700116
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800117 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
118 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
119 _USB_POWER_TIMEOUT = 5
120 _POWER_CYCLE_TIMEOUT = 10
121
beeps32a63082013-08-22 14:02:29 -0700122 _RPC_PROXY_URL = 'http://localhost:%d'
Christopher Wileydd181852013-10-10 19:56:58 -0700123 _RPC_SHUTDOWN_POLLING_PERIOD_SECONDS = 2
Peter Qiu4410db72014-06-05 10:32:41 -0700124 # Set shutdown timeout to account for the time for restarting the UI.
125 _RPC_SHUTDOWN_TIMEOUT_SECONDS = cros_ui.RESTART_UI_TIMEOUT
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800126
Richard Barnette82c35912012-11-20 10:09:10 -0800127 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
128 'rpm_recovery_boards', type=str).split(',')
129
130 _MAX_POWER_CYCLE_ATTEMPTS = 6
131 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
Simran Basi9a479b22014-07-09 11:18:40 -0700132 # TODO (sbasi) crbug.com/392548 - renable support for chromeos 4 once the
133 # rpm work is done.
134 _RPM_HOSTNAME_REGEX = ('chromeos[0-3|5-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
Richard Barnette82c35912012-11-20 10:09:10 -0800135 'host[0-9]+')
136 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
137 'in_illuminance0_raw',
138 'illuminance0_input']
139 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
140 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800141 _DETECTABLE_LABELS = []
142 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
143 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700144
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800145 # Constants used in ping_wait_up() and ping_wait_down().
146 #
147 # _PING_WAIT_COUNT is the approximate number of polling
148 # cycles to use when waiting for a host state change.
149 #
150 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
151 # for arguments to the internal _ping_wait_for_status()
152 # method.
153 _PING_WAIT_COUNT = 40
154 _PING_STATUS_DOWN = False
155 _PING_STATUS_UP = True
156
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800157 # Allowed values for the power_method argument.
158
159 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
160 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
161 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
162 POWER_CONTROL_RPM = 'RPM'
163 POWER_CONTROL_SERVO = 'servoj10'
164 POWER_CONTROL_MANUAL = 'manual'
165
166 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
167 POWER_CONTROL_SERVO,
168 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800169
Simran Basi5e6339a2013-03-21 11:34:32 -0700170 _RPM_OUTLET_CHANGED = 'outlet_changed'
171
Dan Shi9cb0eec2014-06-03 09:04:50 -0700172 # URL pattern to download firmware image.
173 _FW_IMAGE_URL_PATTERN = global_config.global_config.get_config_value(
174 'CROS', 'firmware_url_pattern', type=str)
beeps687243d2013-07-18 15:29:27 -0700175
MK Ryu35d661e2014-09-25 17:44:10 -0700176 # File that has a list of directories to be collected
177 _LOGS_TO_COLLECT_FILE = os.path.join(
178 common.client_dir, 'common_lib', 'logs_to_collect')
179
180 # Prefix of logging message w.r.t. crash collection
181 _CRASHLOGS_PREFIX = 'collect_crashlogs'
182
183 # Time duration waiting for host up/down check
184 _CHECK_HOST_UP_TIMEOUT_SECS = 15
185
186 # A command that interacts with kernel and hardware (e.g., rm, mkdir, etc)
187 # might not be completely done deep through the hardware when the machine
188 # is powered down right after the command returns.
189 # We should wait for a few seconds to make them done. Finger crossed.
190 _SAFE_WAIT_SECS = 10
191
192
J. Richard Barnette964fba02012-10-24 17:34:29 -0700193 @staticmethod
beeps46dadc92013-11-07 14:07:10 -0800194 def check_host(host, timeout=10):
195 """
196 Check if the given host is a chrome-os host.
197
198 @param host: An ssh host representing a device.
199 @param timeout: The timeout for the run command.
200
201 @return: True if the host device is chromeos.
202
beeps46dadc92013-11-07 14:07:10 -0800203 """
204 try:
Christopher Wiley1ea80942014-02-26 16:45:08 -0800205 result = host.run('grep -q CHROMEOS /etc/lsb-release && '
Simran Basie5f7ae42014-06-26 15:44:06 -0700206 '! which adb >/dev/null 2>&1 && '
207 '! grep -q moblab /etc/lsb-release',
Christopher Wileyfc3eac02013-11-21 16:24:57 -0800208 ignore_status=True, timeout=timeout)
beeps46dadc92013-11-07 14:07:10 -0800209 except (error.AutoservRunError, error.AutoservSSHTimeout):
210 return False
211 return result.exit_status == 0
212
213
214 @staticmethod
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800215 def _extract_arguments(args_dict, key_subset):
216 """Extract options from `args_dict` and return a subset result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800217
218 Take the provided dictionary of argument options and return
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800219 a subset that represent standard arguments needed to construct
220 a test-assistant object (chameleon or servo) for a host. The
221 intent is to provide standard argument processing from
222 run_remote_tests for tests that require a test-assistant board
223 to operate.
224
225 @param args_dict Dictionary from which to extract the arguments.
226 @param key_subset Tuple of keys to extract from the args_dict, e.g.
227 ('servo_host', 'servo_port').
228 """
229 result = {}
230 for arg in key_subset:
231 if arg in args_dict:
232 result[arg] = args_dict[arg]
233 return result
234
235
236 @staticmethod
237 def get_chameleon_arguments(args_dict):
238 """Extract chameleon options from `args_dict` and return the result.
239
240 Recommended usage:
241 ~~~~~~~~
242 args_dict = utils.args_to_dict(args)
243 chameleon_args = hosts.CrosHost.get_chameleon_arguments(args_dict)
244 host = hosts.create_host(machine, chameleon_args=chameleon_args)
245 ~~~~~~~~
246
247 @param args_dict Dictionary from which to extract the chameleon
248 arguments.
249 """
250 return CrosHost._extract_arguments(
251 args_dict, ('chameleon_host', 'chameleon_port'))
252
253
254 @staticmethod
255 def get_servo_arguments(args_dict):
256 """Extract servo options from `args_dict` and return the result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800257
258 Recommended usage:
259 ~~~~~~~~
260 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700261 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800262 host = hosts.create_host(machine, servo_args=servo_args)
263 ~~~~~~~~
264
265 @param args_dict Dictionary from which to extract the servo
266 arguments.
267 """
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800268 return CrosHost._extract_arguments(
269 args_dict, ('servo_host', 'servo_port'))
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700270
J. Richard Barnette964fba02012-10-24 17:34:29 -0700271
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800272 def _initialize(self, hostname, chameleon_args=None, servo_args=None,
273 ssh_verbosity_flag='', ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700274 *args, **dargs):
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800275 """Initialize superclasses, |self.chameleon|, and |self.servo|.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700276
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800277 This method checks whether a chameleon/servo (aka
278 test-assistant objects) is required by checking whether
279 chameleon_args/servo_args is None. This method will only
280 attempt to create the test-assistant object when it is
281 required by the test.
Fang Deng5d518f42013-08-02 14:04:32 -0700282
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800283 For creating the test-assistant object, there are three
284 possibilities: First, if the host is a lab system known to have
285 a test-assistant board, we connect to that board unconditionally.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700286 Second, if we're called from a control file that requires
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800287 test-assistant features for testing, it will pass settings from
288 the arguments, like `servo_host`, `servo_port`. If neither of
289 these cases apply, the test-assistant object will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700290
291 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700292 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700293 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700294 # self.env is a dictionary of environment variable settings
295 # to be exported for commands run on the host.
296 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
297 # errors that might happen.
298 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700299 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700300 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700301 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700302 # TODO(fdeng): We need to simplify the
303 # process of servo and servo_host initialization.
304 # crbug.com/298432
Dan Shi4d478522014-02-14 13:46:32 -0800305 self._servo_host = servo_host.create_servo_host(dut=self.hostname,
306 servo_args=servo_args)
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800307 # TODO(waihong): Do the simplication on Chameleon too.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800308 self._chameleon_host = chameleon_host.create_chameleon_host(
309 dut=self.hostname, chameleon_args=chameleon_args)
310
Dan Shi4d478522014-02-14 13:46:32 -0800311 if self._servo_host is not None:
312 self.servo = self._servo_host.get_servo()
313 else:
314 self.servo = None
315
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800316 if self._chameleon_host:
Tom Wai-Hong Tameaee3402014-01-22 08:52:10 +0800317 self.chameleon = self._chameleon_host.create_chameleon_board()
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800318 else:
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800319 self.chameleon = None
Fang Deng5d518f42013-08-02 14:04:32 -0700320
321
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500322 def get_repair_image_name(self):
323 """Generate a image_name from variables in the global config.
324
325 @returns a str of $board-version/$BUILD.
326
327 """
328 stable_version = global_config.global_config.get_config_value(
329 'CROS', 'stable_cros_version')
330 build_pattern = global_config.global_config.get_config_value(
331 'CROS', 'stable_build_pattern')
332 board = self._get_board_from_afe()
333 if board is None:
334 raise error.AutoservError('DUT has no board attribute, '
335 'cannot be repaired.')
336 return build_pattern % (board, stable_version)
337
338
Scott Zawalski62bacae2013-03-05 10:40:32 -0500339 def _host_in_AFE(self):
340 """Check if the host is an object the AFE knows.
341
342 @returns the host object.
343 """
344 return self._AFE.get_hosts(hostname=self.hostname)
345
346
Chris Sosab76e0ee2013-05-22 16:55:41 -0700347 def lookup_job_repo_url(self):
348 """Looks up the job_repo_url for the host.
349
350 @returns job_repo_url from AFE or None if not found.
351
352 @raises KeyError if the host does not have a job_repo_url
353 """
354 if not self._host_in_AFE():
355 return None
356
357 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700358 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
359 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700360
361
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500362 def clear_cros_version_labels_and_job_repo_url(self):
363 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500364 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400365 return
366
Scott Zawalski62bacae2013-03-05 10:40:32 -0500367 host_list = [self.hostname]
368 labels = self._AFE.get_labels(
369 name__startswith=ds_constants.VERSION_PREFIX,
370 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800371
Scott Zawalski62bacae2013-03-05 10:40:32 -0500372 for label in labels:
373 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500374
beepscb6f1e22013-06-28 19:14:10 -0700375 self.update_job_repo_url(None, None)
376
377
378 def update_job_repo_url(self, devserver_url, image_name):
379 """
380 Updates the job_repo_url host attribute and asserts it's value.
381
382 @param devserver_url: The devserver to use in the job_repo_url.
383 @param image_name: The name of the image to use in the job_repo_url.
384
385 @raises AutoservError: If we failed to update the job_repo_url.
386 """
387 repo_url = None
388 if devserver_url and image_name:
389 repo_url = tools.get_package_url(devserver_url, image_name)
390 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500391 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700392 if self.lookup_job_repo_url() != repo_url:
393 raise error.AutoservError('Failed to update job_repo_url with %s, '
394 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500395
396
Dan Shie9309262013-06-19 22:50:21 -0700397 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400398 """Add cros_version labels and host attribute job_repo_url.
399
400 @param image_name: The name of the image e.g.
401 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700402
Scott Zawalskieadbf702013-03-14 09:23:06 -0400403 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500404 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400405 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500406
Scott Zawalskieadbf702013-03-14 09:23:06 -0400407 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700408 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500409
410 labels = self._AFE.get_labels(name=cros_label)
411 if labels:
412 label = labels[0]
413 else:
414 label = self._AFE.create_label(name=cros_label)
415
416 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700417 self.update_job_repo_url(devserver_url, image_name)
418
419
beepsdae65fd2013-07-26 16:24:41 -0700420 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700421 """
422 Make sure job_repo_url of this host is valid.
423
joychen03eaad92013-06-26 09:55:21 -0700424 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700425 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
426 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
427 download and extract it. If the devserver embedded in the url is
428 unresponsive, update the job_repo_url of the host after staging it on
429 another devserver.
430
431 @param job_repo_url: A url pointing to the devserver where the autotest
432 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700433 @param tag: The tag from the server job, in the format
434 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700435
436 @raises DevServerException: If we could not resolve a devserver.
437 @raises AutoservError: If we're unable to save the new job_repo_url as
438 a result of choosing a new devserver because the old one failed to
439 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700440 @raises urllib2.URLError: If the devserver embedded in job_repo_url
441 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700442 """
443 job_repo_url = self.lookup_job_repo_url()
444 if not job_repo_url:
445 logging.warning('No job repo url set on host %s', self.hostname)
446 return
447
448 logging.info('Verifying job repo url %s', job_repo_url)
449 devserver_url, image_name = tools.get_devserver_build_from_package_url(
450 job_repo_url)
451
beeps0c865032013-07-30 11:37:06 -0700452 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700453
454 logging.info('Staging autotest artifacts for %s on devserver %s',
455 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700456
457 start_time = time.time()
beepscb6f1e22013-06-28 19:14:10 -0700458 ds.stage_artifacts(image_name, ['autotest'])
beeps687243d2013-07-18 15:29:27 -0700459 stage_time = time.time() - start_time
460
461 # Record how much of the verification time comes from a devserver
462 # restage. If we're doing things right we should not see multiple
463 # devservers for a given board/build/branch path.
464 try:
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800465 board, build_type, branch = server_utils.ParseBuildName(
beeps687243d2013-07-18 15:29:27 -0700466 image_name)[:3]
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800467 except server_utils.ParseBuildNameException:
beeps687243d2013-07-18 15:29:27 -0700468 pass
469 else:
beeps0c865032013-07-30 11:37:06 -0700470 devserver = devserver_url[
Chris Sosa65425082013-10-16 13:26:22 -0700471 devserver_url.find('/') + 2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700472 stats_key = {
473 'board': board,
474 'build_type': build_type,
475 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700476 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700477 }
478 stats.Gauge('verify_job_repo_url').send(
479 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
480 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700481
Scott Zawalskieadbf702013-03-14 09:23:06 -0400482
Dan Shi0f466e82013-02-22 15:44:58 -0800483 def _try_stateful_update(self, update_url, force_update, updater):
484 """Try to use stateful update to initialize DUT.
485
486 When DUT is already running the same version that machine_install
487 tries to install, stateful update is a much faster way to clean up
488 the DUT for testing, compared to a full reimage. It is implemeted
489 by calling autoupdater.run_update, but skipping updating root, as
490 updating the kernel is time consuming and not necessary.
491
492 @param update_url: url of the image.
493 @param force_update: Set to True to update the image even if the DUT
494 is running the same version.
495 @param updater: ChromiumOSUpdater instance used to update the DUT.
496 @returns: True if the DUT was updated with stateful update.
497
498 """
J. Richard Barnette3f731032014-04-07 17:42:59 -0700499 # TODO(jrbarnette): Yes, I hate this re.match() test case.
500 # It's better than the alternative: see crbug.com/360944.
501 image_name = autoupdater.url_to_image_name(update_url)
502 release_pattern = r'^.*-release/R[0-9]+-[0-9]+\.[0-9]+\.0$'
503 if not re.match(release_pattern, image_name):
504 return False
Dan Shi0f466e82013-02-22 15:44:58 -0800505 if not updater.check_version():
506 return False
507 if not force_update:
508 logging.info('Canceling stateful update because the new and '
509 'old versions are the same.')
510 return False
511 # Following folders should be rebuilt after stateful update.
512 # A test file is used to confirm each folder gets rebuilt after
513 # the stateful update.
514 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
515 test_file = '.test_file_to_be_deleted'
516 for folder in folders_to_check:
517 touch_path = os.path.join(folder, test_file)
518 self.run('touch %s' % touch_path)
519
520 if not updater.run_update(force_update=True, update_root=False):
521 return False
522
523 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700524 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800525 check_file_cmd = 'test -f %s; echo $?'
526 for folder in folders_to_check:
527 test_file_path = os.path.join(folder, test_file)
528 result = self.run(check_file_cmd % test_file_path,
529 ignore_status=True)
530 if result.exit_status == 1:
531 return False
532 return True
533
534
J. Richard Barnette7275b612013-06-04 18:13:11 -0700535 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800536 """After the DUT is updated, confirm machine_install succeeded.
537
538 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700539 @param expected_kernel: kernel expected to be active after reboot,
540 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800541
542 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700543 # Touch the lab machine file to leave a marker that
544 # distinguishes this image from other test images.
545 # Afterwards, we must re-run the autoreboot script because
546 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800547 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800548 self.run('start autoreboot')
Chris Sosa65425082013-10-16 13:26:22 -0700549 updater.verify_boot_expectations(
550 expected_kernel, rollback_message=
551 'Build %s failed to boot on %s; system rolled back to previous'
552 'build' % (updater.update_version, self.hostname))
J. Richard Barnette7275b612013-06-04 18:13:11 -0700553 # Check that we've got the build we meant to install.
554 if not updater.check_version_to_confirm_install():
555 raise autoupdater.ChromiumOSError(
556 'Failed to update %s to build %s; found build '
557 '%s instead' % (self.hostname,
Chris Sosa65425082013-10-16 13:26:22 -0700558 updater.update_version,
559 updater.get_build_id()))
Dan Shi0f466e82013-02-22 15:44:58 -0800560
561
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700562 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400563 """Stage a build on a devserver and return the update_url.
564
565 @param image_name: a name like lumpy-release/R27-3837.0.0
566 @returns an update URL like:
567 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
568 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700569 if not image_name:
570 image_name = self.get_repair_image_name()
571 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400572 devserver = dev_server.ImageServer.resolve(image_name)
573 devserver.trigger_download(image_name, synchronous=False)
574 return tools.image_url_pattern() % (devserver.url(), image_name)
575
576
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700577 def stage_image_for_servo(self, image_name=None):
578 """Stage a build on a devserver and return the update_url.
579
580 @param image_name: a name like lumpy-release/R27-3837.0.0
581 @returns an update URL like:
582 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
583 """
584 if not image_name:
585 image_name = self.get_repair_image_name()
586 logging.info('Staging build for servo install: %s', image_name)
587 devserver = dev_server.ImageServer.resolve(image_name)
588 devserver.stage_artifacts(image_name, ['test_image'])
589 return devserver.get_test_image_url(image_name)
590
591
beepse539be02013-07-31 21:57:39 -0700592 def stage_factory_image_for_servo(self, image_name):
593 """Stage a build on a devserver and return the update_url.
594
595 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700596
beepse539be02013-07-31 21:57:39 -0700597 @return: An update URL, eg:
598 http://<devserver>/static/canary-channel/\
599 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700600
601 @raises: ValueError if the factory artifact name is missing from
602 the config.
603
beepse539be02013-07-31 21:57:39 -0700604 """
605 if not image_name:
606 logging.error('Need an image_name to stage a factory image.')
607 return
608
beeps12c0a3c2013-09-03 11:58:27 -0700609 factory_artifact = global_config.global_config.get_config_value(
610 'CROS', 'factory_artifact', type=str, default='')
611 if not factory_artifact:
612 raise ValueError('Cannot retrieve the factory artifact name from '
613 'autotest config, and hence cannot stage factory '
614 'artifacts.')
615
beepse539be02013-07-31 21:57:39 -0700616 logging.info('Staging build for servo install: %s', image_name)
617 devserver = dev_server.ImageServer.resolve(image_name)
618 devserver.stage_artifacts(
619 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700620 [factory_artifact],
621 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700622
623 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
624
625
Chris Sosaa3ac2152012-05-23 22:23:13 -0700626 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500627 local_devserver=False, repair=False):
628 """Install the DUT.
629
Dan Shi0f466e82013-02-22 15:44:58 -0800630 Use stateful update if the DUT is already running the same build.
631 Stateful update does not update kernel and tends to run much faster
632 than a full reimage. If the DUT is running a different build, or it
633 failed to do a stateful update, full update, including kernel update,
634 will be applied to the DUT.
635
Scott Zawalskieadbf702013-03-14 09:23:06 -0400636 Once a host enters machine_install its cros_version label will be
637 removed as well as its host attribute job_repo_url (used for
638 package install).
639
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500640 @param update_url: The url to use for the update
641 pattern: http://$devserver:###/update/$build
642 If update_url is None and repair is True we will install the
643 stable image listed in global_config under
644 CROS.stable_cros_version.
645 @param force_update: Force an update even if the version installed
646 is the same. Default:False
647 @param local_devserver: Used by run_remote_test to allow people to
648 use their local devserver. Default: False
649 @param repair: Whether or not we are in repair mode. This adds special
650 cases for repairing a machine like starting update_engine.
651 Setting repair to True sets force_update to True as well.
652 default: False
653 @raises autoupdater.ChromiumOSError
654
655 """
Dan Shi7458bf62013-06-10 12:50:16 -0700656 if update_url:
657 logging.debug('update url is set to %s', update_url)
658 else:
659 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700660 if self._parser.options.image:
661 requested_build = self._parser.options.image
662 if requested_build.startswith('http://'):
663 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700664 logging.debug('update url is retrieved from requested_build'
665 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700666 else:
667 # Try to stage any build that does not start with
668 # http:// on the devservers defined in
669 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700670 update_url = self._stage_image_for_update(requested_build)
671 logging.debug('Build staged, and update_url is set to: %s',
672 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700673 elif repair:
674 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700675 logging.debug('Build staged, and update_url is set to: %s',
676 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400677 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700678 raise autoupdater.ChromiumOSError(
679 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500680
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500681 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800682 # In case the system is in a bad state, we always reboot the machine
683 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700684 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500685 self.run('stop update-engine; start update-engine')
686 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800687
Chris Sosaa3ac2152012-05-23 22:23:13 -0700688 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700689 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800690 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400691 # Remove cros-version and job_repo_url host attribute from host.
692 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800693 # If the DUT is already running the same build, try stateful update
694 # first. Stateful update does not update kernel and tends to run much
695 # faster than a full reimage.
696 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700697 updated = self._try_stateful_update(
698 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800699 if updated:
700 logging.info('DUT is updated with stateful update.')
701 except Exception as e:
702 logging.exception(e)
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700703 logging.warning('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700704
Dan Shi0f466e82013-02-22 15:44:58 -0800705 inactive_kernel = None
706 # Do a full update if stateful update is not applicable or failed.
707 if not updated:
708 # In case the system is in a bad state, we always reboot the
709 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700710 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700711
712 # TODO(sosa): Remove temporary hack to get rid of bricked machines
713 # that can't update due to a corrupted policy.
714 self.run('rm -rf /var/lib/whitelist')
715 self.run('touch /var/lib/whitelist')
716 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400717 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700718
Dan Shi0f466e82013-02-22 15:44:58 -0800719 if updater.run_update(force_update):
720 updated = True
721 # Figure out active and inactive kernel.
722 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700723
Dan Shi0f466e82013-02-22 15:44:58 -0800724 # Ensure inactive kernel has higher priority than active.
725 if (updater.get_kernel_priority(inactive_kernel)
726 < updater.get_kernel_priority(active_kernel)):
727 raise autoupdater.ChromiumOSError(
728 'Update failed. The priority of the inactive kernel'
729 ' partition is less than that of the active kernel'
730 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700731
Dan Shi0f466e82013-02-22 15:44:58 -0800732 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700733 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700734
Dan Shi0f466e82013-02-22 15:44:58 -0800735 if updated:
736 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400737 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700738 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800739
Simran Basiae08c8c2014-09-02 11:17:26 -0700740 logging.debug('Cleaning up old autotest directories.')
741 try:
742 installed_autodir = autotest.Autotest.get_installed_autodir(self)
743 self.run('rm -rf ' + installed_autodir)
744 except autotest.AutodirNotFoundError:
745 logging.debug('No autotest installed directory found.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700746
747
Dan Shi9cb0eec2014-06-03 09:04:50 -0700748 def _clear_fw_version_labels(self):
749 """Clear firmware version labels from the machine."""
750 labels = self._AFE.get_labels(
751 name__startswith=provision.FW_VERSION_PREFIX,
752 host__hostname=self.hostname)
753 for label in labels:
754 label.remove_hosts(hosts=[self.hostname])
755
756
757 def _add_fw_version_label(self, build):
758 """Add firmware version label to the machine.
759
760 @param build: Build of firmware.
761
762 """
763 fw_label = provision.fw_version_to_label(build)
764 provision.ensure_label_exists(fw_label)
765 label = self._AFE.get_labels(name__startswith=fw_label)[0]
766 label.add_hosts([self.hostname])
767
768
769 def firmware_install(self, build=None):
770 """Install firmware to the DUT.
771
772 Use stateful update if the DUT is already running the same build.
773 Stateful update does not update kernel and tends to run much faster
774 than a full reimage. If the DUT is running a different build, or it
775 failed to do a stateful update, full update, including kernel update,
776 will be applied to the DUT.
777
778 Once a host enters firmware_install its fw_version label will be
779 removed. After the firmware is updated successfully, a new fw_version
780 label will be added to the host.
781
782 @param build: The build version to which we want to provision the
783 firmware of the machine,
784 e.g. 'link-firmware/R22-2695.1.144'.
785
786 TODO(dshi): After bug 381718 is fixed, update here with corresponding
787 exceptions that could be raised.
788
789 """
790 if not self.servo:
791 raise error.TestError('Host %s does not have servo.' %
792 self.hostname)
793
794 # TODO(fdeng): use host.get_board() after
795 # crbug.com/271834 is fixed.
796 board = self._get_board_from_afe()
797
798 # If build is not set, assume it's repair mode and try to install
799 # firmware from stable CrOS.
800 if not build:
801 build = self.get_repair_image_name()
802
803 config = FAFTConfig(board)
804 if config.use_u_boot:
805 ap_image = 'image-%s.bin' % board
806 else: # Depthcharge platform
807 ap_image = 'image.bin'
808 ec_image = 'ec.bin'
809 ds = dev_server.ImageServer.resolve(build)
810 ds.stage_artifacts(build, ['firmware'])
811
812 tmpd = autotemp.tempdir(unique_id='fwimage')
813 try:
814 fwurl = self._FW_IMAGE_URL_PATTERN % (ds.url(), build)
815 local_tarball = os.path.join(tmpd.name, os.path.basename(fwurl))
816 server_utils.system('wget -O %s %s' % (local_tarball, fwurl),
817 timeout=60)
818 server_utils.system('tar xf %s -C %s %s %s' %
819 (local_tarball, tmpd.name, ap_image, ec_image),
820 timeout=60)
821 server_utils.system('tar xf %s --wildcards -C %s "dts/*"' %
822 (local_tarball, tmpd.name),
823 timeout=60, ignore_status=True)
824
825 self._clear_fw_version_labels()
826 logging.info('Will re-program EC now')
827 self.servo.program_ec(os.path.join(tmpd.name, ec_image))
828 logging.info('Will re-program BIOS now')
829 self.servo.program_bios(os.path.join(tmpd.name, ap_image))
830 self.servo.get_power_state_controller().reset()
831 time.sleep(self.servo.BOOT_DELAY)
832 self._add_fw_version_label()
833 finally:
834 tmpd.clean()
835
836
Dan Shi10e992b2013-08-30 11:02:59 -0700837 def show_update_engine_log(self):
838 """Output update engine log."""
MK Ryu35d661e2014-09-25 17:44:10 -0700839 logging.debug('Dumping %s', client_constants.UPDATE_ENGINE_LOG)
840 self.run('cat %s' % client_constants.UPDATE_ENGINE_LOG)
Dan Shi10e992b2013-08-30 11:02:59 -0700841
842
Richard Barnette82c35912012-11-20 10:09:10 -0800843 def _get_board_from_afe(self):
844 """Retrieve this host's board from its labels in the AFE.
845
846 Looks for a host label of the form "board:<board>", and
847 returns the "<board>" part of the label. `None` is returned
848 if there is not a single, unique label matching the pattern.
849
850 @returns board from label, or `None`.
851 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700852 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800853
854
855 def get_build(self):
856 """Retrieve the current build for this Host from the AFE.
857
858 Looks through this host's labels in the AFE to determine its build.
859
860 @returns The current build or None if it could not find it or if there
861 were multiple build labels assigned to this host.
862 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700863 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800864
865
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500866 def _install_repair(self):
867 """Attempt to repair this host using upate-engine.
868
869 If the host is up, try installing the DUT with a stable
870 "repair" version of Chrome OS as defined in the global_config
871 under CROS.stable_cros_version.
872
Scott Zawalski62bacae2013-03-05 10:40:32 -0500873 @raises AutoservRepairMethodNA if the DUT is not reachable.
874 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500875
876 """
877 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500878 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500879 logging.info('Attempting to reimage machine to repair image.')
880 try:
881 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700882 except autoupdater.ChromiumOSError as e:
883 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500884 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500885 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500886
887
Dan Shi2c88eed2013-11-12 10:18:38 -0800888 def _install_repair_with_powerwash(self):
Dan Shi9cc48452013-11-12 12:39:26 -0800889 """Attempt to powerwash first then repair this host using update-engine.
Dan Shi2c88eed2013-11-12 10:18:38 -0800890
Dan Shi9cc48452013-11-12 12:39:26 -0800891 update-engine may fail due to a bad image. In such case, powerwash
892 may help to cleanup the DUT for update-engine to work again.
Dan Shi2c88eed2013-11-12 10:18:38 -0800893
894 @raises AutoservRepairMethodNA if the DUT is not reachable.
895 @raises ChromiumOSError if the install failed for some reason.
896
897 """
898 if not self.is_up():
899 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
900
901 logging.info('Attempting to powerwash the DUT.')
902 self.run('echo "fast safe" > '
903 '/mnt/stateful_partition/factory_install_reset')
904 self.reboot(timeout=self.POWERWASH_BOOT_TIMEOUT, wait=True)
905 if not self.is_up():
Dan Shi9cc48452013-11-12 12:39:26 -0800906 logging.error('Powerwash failed. DUT did not come back after '
Dan Shi2c88eed2013-11-12 10:18:38 -0800907 'reboot.')
908 raise error.AutoservRepairFailure(
909 'DUT failed to boot from powerwash after %d seconds' %
910 self.POWERWASH_BOOT_TIMEOUT)
911
912 logging.info('Powerwash succeeded.')
913 self._install_repair()
914
915
beepsf079cfb2013-09-18 17:49:51 -0700916 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
917 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500918 """
919 Re-install the OS on the DUT by:
920 1) installing a test image on a USB storage device attached to the Servo
921 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800922 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700923 3) installing the image with chromeos-install.
924
Scott Zawalski62bacae2013-03-05 10:40:32 -0500925 @param image_url: If specified use as the url to install on the DUT.
926 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700927 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
928 Factory images need a longer usb_boot_timeout than regular
929 cros images.
930 @param install_timeout: The timeout to use when installing the chromeos
931 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800932
Scott Zawalski62bacae2013-03-05 10:40:32 -0500933 @raises AutoservError if the image fails to boot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800934 """
beepsf079cfb2013-09-18 17:49:51 -0700935
936 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
937 % usb_boot_timeout)
938 logging.info('Downloading image to USB, then booting from it. Usb boot '
939 'timeout = %s', usb_boot_timeout)
940 timer = stats.Timer(usb_boot_timer_key)
941 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700942 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700943 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500944 raise error.AutoservRepairFailure(
945 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700946 usb_boot_timeout)
947 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500948
beepsf079cfb2013-09-18 17:49:51 -0700949 install_timer_key = ('servo_install.install_timeout_%s'
950 % install_timeout)
951 timer = stats.Timer(install_timer_key)
952 timer.start()
953 logging.info('Installing image through chromeos-install.')
MK Ryu35d661e2014-09-25 17:44:10 -0700954 self.run('chromeos-install --yes --lab_preserve_logs=%s' %
955 self._LOGS_TO_COLLECT_FILE,
956 timeout=install_timeout)
beepsf079cfb2013-09-18 17:49:51 -0700957 timer.stop()
958
959 logging.info('Power cycling DUT through servo.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800960 self.servo.power_long_press()
Fang Dengafb88142013-05-30 17:44:31 -0700961 self.servo.switch_usbkey('off')
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700962 # We *must* use power_on() here; on Parrot it's how we get
963 # out of recovery mode.
964 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -0700965
966 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800967 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
968 raise error.AutoservError('DUT failed to reboot installed '
969 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500970 self.BOOT_TIMEOUT)
971
972
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700973 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500974 """Reinstall the DUT utilizing servo and a test image.
975
976 Re-install the OS on the DUT by:
977 1) installing a test image on a USB storage device attached to the Servo
978 board,
979 2) booting that image in recovery mode, and then
980 3) installing the image with chromeos-install.
981
Scott Zawalski62bacae2013-03-05 10:40:32 -0500982 @raises AutoservRepairMethodNA if the device does not have servo
983 support.
984
985 """
986 if not self.servo:
987 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
988 'DUT has no servo support.')
989
990 logging.info('Attempting to recovery servo enabled device with '
991 'servo_repair_reinstall')
992
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700993 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500994 self.servo_install(image_url)
995
996
997 def _servo_repair_power(self):
998 """Attempt to repair DUT using an attached Servo.
999
1000 Attempt to power on the DUT via power_long_press.
1001
1002 @raises AutoservRepairMethodNA if the device does not have servo
1003 support.
1004 @raises AutoservRepairFailure if the repair fails for any reason.
1005 """
1006 if not self.servo:
1007 raise error.AutoservRepairMethodNA('Repair Power NA: '
1008 'DUT has no servo support.')
1009
1010 logging.info('Attempting to recover servo enabled device by '
1011 'powering it off and on.')
1012 self.servo.get_power_state_controller().power_off()
1013 self.servo.get_power_state_controller().power_on()
1014 if self.wait_up(self.BOOT_TIMEOUT):
1015 return
1016
1017 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -08001018
1019
Richard Barnette82c35912012-11-20 10:09:10 -08001020 def _powercycle_to_repair(self):
1021 """Utilize the RPM Infrastructure to bring the host back up.
1022
1023 If the host is not up/repaired after the first powercycle we utilize
1024 auto fallback to the last good install by powercycling and rebooting the
1025 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001026
1027 @raises AutoservRepairMethodNA if the device does not support remote
1028 power.
1029 @raises AutoservRepairFailure if the repair fails for any reason.
1030
Richard Barnette82c35912012-11-20 10:09:10 -08001031 """
Scott Zawalski62bacae2013-03-05 10:40:32 -05001032 if not self.has_power():
1033 raise error.AutoservRepairMethodNA('Device does not support power.')
1034
Richard Barnette82c35912012-11-20 10:09:10 -08001035 logging.info('Attempting repair via RPM powercycle.')
1036 failed_cycles = 0
1037 self.power_cycle()
1038 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
1039 failed_cycles += 1
1040 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -05001041 raise error.AutoservRepairFailure(
1042 'Powercycled host %s %d times; device did not come back'
1043 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -08001044 self.power_cycle()
1045 if failed_cycles == 0:
1046 logging.info('Powercycling was successful first time.')
1047 else:
1048 logging.info('Powercycling was successful after %d failures.',
1049 failed_cycles)
1050
1051
MK Ryu35d661e2014-09-25 17:44:10 -07001052 def _reboot_repair(self):
1053 """SSH to this host and reboot."""
1054 if not self.is_up(self._CHECK_HOST_UP_TIMEOUT_SECS):
1055 raise error.AutoservRepairMethodNA('DUT unreachable for reboot.')
1056 logging.info('Attempting repair via SSH reboot.')
1057 self.reboot(timeout=self.BOOT_TIMEOUT, wait=True)
1058
1059
Prashanth B4d8184f2014-05-05 12:22:02 -07001060 def check_device(self):
1061 """Check if a device is ssh-able, and if so, clean and verify it.
1062
1063 @raise AutoservSSHTimeout: If the ssh ping times out.
1064 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
1065 permissions.
1066 @raise AutoservSshPingHostError: For other AutoservRunErrors during
1067 ssh_ping.
1068 @raises AutoservError: As appropriate, during cleanup and verify.
1069 """
1070 self.ssh_ping()
1071 self.cleanup()
1072 self.verify()
1073
1074
Richard Barnette82c35912012-11-20 10:09:10 -08001075 def repair_full(self):
1076 """Repair a host for repair level NO_PROTECTION.
1077
1078 This overrides the base class function for repair; it does
1079 not call back to the parent class, but instead offers a
1080 simplified implementation based on the capabilities in the
1081 Chrome OS test lab.
1082
Fang Deng5d518f42013-08-02 14:04:32 -07001083 It first verifies and repairs servo if it is a DUT in CrOS
Fang Deng03590af2013-10-07 17:34:20 -07001084 lab and a servo is attached.
Fang Deng5d518f42013-08-02 14:04:32 -07001085
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001086 This escalates in order through the following procedures and verifies
1087 the status using `self.check_device()` after each of them. This is done
1088 until both the repair and the veryfing step succeed.
1089
MK Ryu35d661e2014-09-25 17:44:10 -07001090 Escalation order of repair procedures from less intrusive to
1091 more intrusive repairs:
1092 1. SSH to the DUT and reboot.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001093 2. If there's a servo for the DUT, try to power the DUT off and
1094 on.
MK Ryu35d661e2014-09-25 17:44:10 -07001095 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -08001096 by power-cycling.
MK Ryu35d661e2014-09-25 17:44:10 -07001097 4. Try to re-install to a known stable image using
1098 auto-update.
1099 5. If there's a servo for the DUT, try to re-install via
1100 the servo.
Richard Barnette82c35912012-11-20 10:09:10 -08001101
1102 As with the parent method, the last operation performed on
Prashanth B4d8184f2014-05-05 12:22:02 -07001103 the DUT must be to call `self.check_device()`; If that call fails the
1104 exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -07001105
Scott Zawalski62bacae2013-03-05 10:40:32 -05001106 @raises AutoservRepairTotalFailure if the repair process fails to
1107 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -07001108 @raises ServoHostRepairTotalFailure if the repair process fails to
1109 fix the servo host if one is attached to the DUT.
1110 @raises AutoservSshPermissionDeniedError if it is unable
1111 to ssh to the servo host due to permission error.
1112
Richard Barnette82c35912012-11-20 10:09:10 -08001113 """
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001114 # Caution: Deleting shards relies on repair to always reboot the DUT.
1115
Dan Shi4d478522014-02-14 13:46:32 -08001116 if self._servo_host and not self.servo:
Fang Deng03590af2013-10-07 17:34:20 -07001117 try:
Dan Shi4d478522014-02-14 13:46:32 -08001118 self._servo_host.repair_full()
Fang Deng03590af2013-10-07 17:34:20 -07001119 except Exception as e:
Fang Deng03590af2013-10-07 17:34:20 -07001120 logging.error('Could not create a healthy servo: %s', e)
Dan Shi4d478522014-02-14 13:46:32 -08001121 self.servo = self._servo_host.get_servo()
Fang Deng5d518f42013-08-02 14:04:32 -07001122
MK Ryu35d661e2014-09-25 17:44:10 -07001123 self.try_collect_crashlogs()
1124
Scott Zawalski62bacae2013-03-05 10:40:32 -05001125 # TODO(scottz): This should use something similar to label_decorator,
1126 # but needs to be populated in order so DUTs are repaired with the
1127 # least amount of effort.
MK Ryu35d661e2014-09-25 17:44:10 -07001128 repair_funcs = [self._reboot_repair,
1129 self._servo_repair_power,
1130 self._powercycle_to_repair,
Dan Shi849a1c42014-03-05 11:10:43 -08001131 self._install_repair,
Dan Shi2c88eed2013-11-12 10:18:38 -08001132 self._install_repair_with_powerwash,
MK Ryu35d661e2014-09-25 17:44:10 -07001133 self._servo_repair_reinstall]
Scott Zawalski62bacae2013-03-05 10:40:32 -05001134 errors = []
Simran Basie6130932013-10-01 14:07:52 -07001135 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001136 for repair_func in repair_funcs:
1137 try:
1138 repair_func()
MK Ryu35d661e2014-09-25 17:44:10 -07001139 self.try_collect_crashlogs()
Prashanth B4d8184f2014-05-05 12:22:02 -07001140 self.check_device()
Simran Basie6130932013-10-01 14:07:52 -07001141 stats.Counter(
1142 '%s.SUCCEEDED' % repair_func.__name__).increment()
1143 if board:
1144 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001145 '%s.%s.SUCCEEDED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001146 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001147 return
Simran Basie6130932013-10-01 14:07:52 -07001148 except error.AutoservRepairMethodNA as e:
1149 stats.Counter(
1150 '%s.RepairNA' % repair_func.__name__).increment()
1151 if board:
1152 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001153 '%s.%s.RepairNA' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001154 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001155 logging.warning('Repair function NA: %s', e)
Simran Basie6130932013-10-01 14:07:52 -07001156 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -05001157 except Exception as e:
Simran Basie6130932013-10-01 14:07:52 -07001158 stats.Counter(
1159 '%s.FAILED' % repair_func.__name__).increment()
1160 if board:
1161 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001162 '%s.%s.FAILED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001163 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001164 logging.warning('Failed to repair device: %s', e)
Scott Zawalski62bacae2013-03-05 10:40:32 -05001165 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -05001166
Simran Basie6130932013-10-01 14:07:52 -07001167 stats.Counter('Full_Repair_Failed').increment()
1168 if board:
1169 stats.Counter(
1170 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001171 raise error.AutoservRepairTotalFailure(
1172 'All attempts at repairing the device failed:\n%s' %
1173 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -08001174
1175
MK Ryu35d661e2014-09-25 17:44:10 -07001176 def try_collect_crashlogs(self, check_host_up=True):
1177 """
1178 Check if a host is up and logs need to be collected from the host,
1179 if yes, collect them.
1180
1181 @param check_host_up: Flag for checking host is up. Default is True.
1182 """
1183 try:
1184 crash_job = self._need_crash_logs()
1185 if crash_job:
1186 logging.debug('%s: Job %s was crashed', self._CRASHLOGS_PREFIX,
1187 crash_job)
1188 if not check_host_up or self.is_up(
1189 self._CHECK_HOST_UP_TIMEOUT_SECS):
1190 self._collect_crashlogs(crash_job)
1191 logging.debug('%s: Completed collecting logs for the '
1192 'crashed job %s', self._CRASHLOGS_PREFIX,
1193 crash_job)
1194 except Exception as e:
1195 # Exception should not result in repair failure.
1196 # Therefore, suppress all exceptions here.
1197 logging.error('%s: Failed while trying to collect crash-logs: %s',
1198 self._CRASHLOGS_PREFIX, e)
1199
1200
1201 def _need_crash_logs(self):
1202 """Get the value of need_crash_logs attribute of this host.
1203
1204 @return: Value string of need_crash_logs attribute
1205 None if there is no need_crash_logs attribute
1206 """
1207 attrs = self._AFE.get_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1208 hostname=self.hostname)
1209 assert len(attrs) < 2
1210 return attrs[0].value if attrs else None
1211
1212
1213 def _collect_crashlogs(self, job_id):
1214 """Grab logs from the host where a job was crashed.
1215
1216 First, check if PRIOR_LOGS_DIR exists in the host.
1217 If yes, collect them.
1218 Otherwise, check if a lab-machine marker (_LAB_MACHINE_FILE) exists
1219 in the host.
1220 If yes, the host was repaired automatically, and we collect normal
1221 system logs.
1222
1223 @param job_id: Id of the job that was crashed.
1224 """
1225 crashlogs_dir = crashcollect.get_crashinfo_dir(self,
1226 constants.CRASHLOGS_DEST_DIR_PREFIX)
1227 flag_prior_logs = False
1228
1229 if self.path_exists(client_constants.PRIOR_LOGS_DIR):
1230 flag_prior_logs = True
1231 self._collect_prior_logs(crashlogs_dir)
1232 elif self.path_exists(self._LAB_MACHINE_FILE):
1233 self._collect_system_logs(crashlogs_dir)
1234 else:
1235 logging.warning('%s: Host was manually re-installed without '
1236 '--lab_preserve_log option. Skip collecting '
1237 'crash-logs.', self._CRASHLOGS_PREFIX)
1238
1239 # We make crash collection be one-time effort.
1240 # _collect_prior_logs() and _collect_system_logs() will not throw
1241 # any exception, and following codes will be executed even when
1242 # those methods fail.
1243 # _collect_crashlogs() is called only when the host is up (refer
1244 # to try_collect_crashlogs()). We assume _collect_prior_logs() and
1245 # _collect_system_logs() fail rarely when the host is up.
1246 # In addition, it is not clear how many times we should try crash
1247 # collection again while not triggering next repair unnecessarily.
1248 # Threfore, we try crash collection one time.
1249
1250 # Create a marker file as soon as log collection is done.
1251 # Leave the job id to this marker for gs_offloader to consume.
1252 marker_file = os.path.join(crashlogs_dir, constants.CRASHLOGS_MARKER)
1253 with open(marker_file, 'a') as f:
1254 f.write('%s\n' % job_id)
1255
1256 # Remove need_crash_logs attribute
1257 logging.debug('%s: Remove attribute need_crash_logs from host %s',
1258 self._CRASHLOGS_PREFIX, self.hostname)
1259 self._AFE.set_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1260 None, hostname=self.hostname)
1261
1262 if flag_prior_logs:
1263 logging.debug('%s: Remove %s from host %s', self._CRASHLOGS_PREFIX,
1264 client_constants.PRIOR_LOGS_DIR, self.hostname)
1265 self.run('rm -rf %s; sync' % client_constants.PRIOR_LOGS_DIR)
1266 # Wait for a few seconds to make sure the prior command is
1267 # done deep through storage.
1268 time.sleep(self._SAFE_WAIT_SECS)
1269
1270
1271 def _collect_prior_logs(self, crashlogs_dir):
1272 """Grab prior logs that were stashed before re-installing a host.
1273
1274 @param crashlogs_dir: Directory path where crash-logs are stored.
1275 """
1276 logging.debug('%s: Found %s, collecting them...',
1277 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1278 try:
1279 self.collect_logs(client_constants.PRIOR_LOGS_DIR,
1280 crashlogs_dir, False)
1281 logging.debug('%s: %s is collected',
1282 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1283 except Exception as e:
1284 logging.error('%s: Failed to collect %s: %s',
1285 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR,
1286 e)
1287
1288
1289 def _collect_system_logs(self, crashlogs_dir):
1290 """Grab normal system logs from a host.
1291
1292 @param crashlogs_dir: Directory path where crash-logs are stored.
1293 """
1294 logging.debug('%s: Found %s, collecting system logs...',
1295 self._CRASHLOGS_PREFIX, self._LAB_MACHINE_FILE)
1296 sources = server_utils.parse_simple_config(self._LOGS_TO_COLLECT_FILE)
1297 for src in sources:
1298 try:
1299 if self.path_exists(src):
1300 logging.debug('%s: Collecting %s...',
1301 self._CRASHLOGS_PREFIX, src)
1302 dest = server_utils.concat_path_except_last(
1303 crashlogs_dir, src)
1304 self.collect_logs(src, dest, False)
1305 logging.debug('%s: %s is collected',
1306 self._CRASHLOGS_PREFIX, src)
1307 except Exception as e:
1308 logging.error('%s: Failed to collect %s: %s',
1309 self._CRASHLOGS_PREFIX, src, e)
1310
1311
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001312 def close(self):
beeps32a63082013-08-22 14:02:29 -07001313 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -07001314 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001315
1316
Simran Basi5e6339a2013-03-21 11:34:32 -07001317 def _cleanup_poweron(self):
1318 """Special cleanup method to make sure hosts always get power back."""
1319 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1320 hosts = afe.get_hosts(hostname=self.hostname)
1321 if not hosts or not (self._RPM_OUTLET_CHANGED in
1322 hosts[0].attributes):
1323 return
1324 logging.debug('This host has recently interacted with the RPM'
1325 ' Infrastructure. Ensuring power is on.')
1326 try:
1327 self.power_on()
1328 except rpm_client.RemotePowerException:
1329 # If cleanup has completed but there was an issue with the RPM
1330 # Infrastructure, log an error message rather than fail cleanup
1331 logging.error('Failed to turn Power On for this host after '
1332 'cleanup through the RPM Infrastructure.')
1333 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1334 hostname=self.hostname)
1335
1336
beepsc87ff602013-07-31 21:53:00 -07001337 def _is_factory_image(self):
1338 """Checks if the image on the DUT is a factory image.
1339
1340 @return: True if the image on the DUT is a factory image.
1341 False otherwise.
1342 """
1343 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1344 return result.exit_status == 0
1345
1346
1347 def _restart_ui(self):
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001348 """Restart the Chrome UI.
beepsc87ff602013-07-31 21:53:00 -07001349
1350 @raises: FactoryImageCheckerException for factory images, since
1351 we cannot attempt to restart ui on them.
1352 error.AutoservRunError for any other type of error that
1353 occurs while restarting ui.
1354 """
1355 if self._is_factory_image():
1356 raise FactoryImageCheckerException('Cannot restart ui on factory '
1357 'images')
1358
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001359 # TODO(jrbarnette): The command to stop/start the ui job
1360 # should live inside cros_ui, too. However that would seem
1361 # to imply interface changes to the existing start()/restart()
1362 # functions, which is a bridge too far (for now).
1363 prompt = cros_ui.get_login_prompt_state(self)
1364 self.run('stop ui; start ui')
1365 cros_ui.wait_for_chrome_ready(prompt, self)
beepsc87ff602013-07-31 21:53:00 -07001366
1367
1368 def cleanup(self):
MK Ryu35d661e2014-09-25 17:44:10 -07001369 self.run('rm -f %s' % client_constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001370 try:
beepsc87ff602013-07-31 21:53:00 -07001371 self._restart_ui()
1372 except (error.AutotestRunError, error.AutoservRunError,
1373 FactoryImageCheckerException):
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001374 logging.warning('Unable to restart ui, rebooting device.')
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001375 # Since restarting the UI fails fall back to normal Autotest
1376 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001377 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001378 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001379 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001380 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001381
1382
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001383 def reboot(self, **dargs):
1384 """
1385 This function reboots the site host. The more generic
1386 RemoteHost.reboot() performs sync and sleeps for 5
1387 seconds. This is not necessary for Chrome OS devices as the
1388 sync should be finished in a short time during the reboot
1389 command.
1390 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001391 if 'reboot_cmd' not in dargs:
Doug Anderson7d5aeb22014-02-27 15:12:17 -08001392 reboot_timeout = dargs.get('reboot_timeout', 10)
1393 dargs['reboot_cmd'] = ('((reboot & sleep %d; reboot -f &)'
1394 ' </dev/null >/dev/null 2>&1 &)' %
1395 reboot_timeout)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001396 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001397 if 'fastsync' not in dargs:
1398 dargs['fastsync'] = True
Michael Liangda8c60a2014-06-03 13:24:51 -07001399
Charlie Mooneya8e6dab2014-05-29 14:37:55 -07001400 # For purposes of logging reboot times:
1401 # Get the board name i.e. 'daisy_spring'
Michael Liangca4f5a62014-07-10 15:45:13 -07001402 board_fullname = self.get_board()
1403
1404 # Strip the prefix and add it to dargs.
1405 dargs['board'] = board_fullname[board_fullname.find(':')+1:]
Fang Deng0ca40e22013-08-27 17:47:44 -07001406 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001407
1408
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07001409 def suspend(self, **dargs):
1410 """
1411 This function suspends the site host.
1412 """
1413 suspend_time = dargs.get('suspend_time', 60)
1414 dargs['timeout'] = suspend_time
1415 if 'suspend_cmd' not in dargs:
1416 cmd = ' && '.join(['echo 0 > /sys/class/rtc/rtc0/wakealarm',
1417 'echo +%d > /sys/class/rtc/rtc0/wakealarm' % suspend_time,
1418 'powerd_dbus_suspend --delay=0 &'])
1419 dargs['suspend_cmd'] = ('(( %s )'
1420 '< /dev/null >/dev/null 2>&1 &)' % cmd)
1421 super(CrosHost, self).suspend(**dargs)
1422
1423
Simran Basiec564392014-08-25 16:48:09 -07001424 def upstart_status(self, service_name):
1425 """Check the status of an upstart init script.
1426
1427 @param service_name: Service to look up.
1428
1429 @returns True if the service is running, False otherwise.
1430 """
1431 return self.run('status %s | grep start/running' %
1432 service_name).stdout.strip() != ''
1433
1434
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001435 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001436 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001437
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001438 Tests for the following conditions:
1439 1. All conditions tested by the parent version of this
1440 function.
1441 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001442 3. Sufficient space in /mnt/stateful_partition/encrypted.
1443 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001444
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001445 """
MK Ryu35d661e2014-09-25 17:44:10 -07001446 # Check if a job was crashed on this host.
1447 # If yes, avoid verification until crash-logs are collected.
1448 if self._need_crash_logs():
1449 raise error.AutoservCrashLogCollectRequired(
1450 'Need to collect crash-logs before verification')
1451
Fang Deng0ca40e22013-08-27 17:47:44 -07001452 super(CrosHost, self).verify_software()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001453 self.check_diskspace(
1454 '/mnt/stateful_partition',
1455 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001456 'SERVER', 'gb_diskspace_required', type=float,
1457 default=20.0))
Gaurav Shahe448af82014-06-19 15:18:59 -07001458 encrypted_stateful_path = '/mnt/stateful_partition/encrypted'
1459 # Not all targets build with encrypted stateful support.
1460 if self.path_exists(encrypted_stateful_path):
1461 self.check_diskspace(
1462 encrypted_stateful_path,
1463 global_config.global_config.get_config_value(
1464 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1465 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001466
Simran Basiec564392014-08-25 16:48:09 -07001467 if not self.upstart_status('system-services'):
Prashanth B5d0a0512014-04-25 12:26:08 -07001468 raise error.AutoservError('Chrome failed to reach login. '
1469 'System services not running.')
1470
beepsc87ff602013-07-31 21:53:00 -07001471 # Factory images don't run update engine,
1472 # goofy controls dbus on these DUTs.
1473 if not self._is_factory_image():
1474 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001475 # Makes sure python is present, loads and can use built in functions.
1476 # We have seen cases where importing cPickle fails with undefined
1477 # symbols in cPickle.so.
1478 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001479
1480
Fang Deng96667ca2013-08-01 17:46:18 -07001481 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1482 connect_timeout=None, alive_interval=None):
1483 """Override default make_ssh_command to use options tuned for Chrome OS.
1484
1485 Tuning changes:
1486 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1487 connection failure. Consistency with remote_access.sh.
1488
1489 - ServerAliveInterval=180; which causes SSH to ping connection every
1490 180 seconds. In conjunction with ServerAliveCountMax ensures
1491 that if the connection dies, Autotest will bail out quickly.
1492 Originally tried 60 secs, but saw frequent job ABORTS where
1493 the test completed successfully.
1494
1495 - ServerAliveCountMax=3; consistency with remote_access.sh.
1496
1497 - ConnectAttempts=4; reduce flakiness in connection errors;
1498 consistency with remote_access.sh.
1499
1500 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1501 Host keys change with every new installation, don't waste
1502 memory/space saving them.
1503
1504 - SSH protocol forced to 2; needed for ServerAliveInterval.
1505
1506 @param user User name to use for the ssh connection.
1507 @param port Port on the target host to use for ssh connection.
1508 @param opts Additional options to the ssh command.
1509 @param hosts_file Ignored.
1510 @param connect_timeout Ignored.
1511 @param alive_interval Ignored.
1512 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001513 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1514 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001515 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1516 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1517 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1518 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001519 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1520 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001521
1522
beeps32a63082013-08-22 14:02:29 -07001523 def _create_ssh_tunnel(self, port, local_port):
1524 """Create an ssh tunnel from local_port to port.
1525
1526 @param port: remote port on the host.
1527 @param local_port: local forwarding port.
1528
1529 @return: the tunnel process.
1530 """
1531 # Chrome OS on the target closes down most external ports
1532 # for security. We could open the port, but doing that
1533 # would conflict with security tests that check that only
1534 # expected ports are open. So, to get to the port on the
1535 # target we use an ssh tunnel.
1536 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1537 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1538 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1539 logging.debug('Full tunnel command: %s', tunnel_cmd)
1540 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1541 logging.debug('Started ssh tunnel, local = %d'
1542 ' remote = %d, pid = %d',
1543 local_port, port, tunnel_proc.pid)
1544 return tunnel_proc
1545
1546
Christopher Wileydd181852013-10-10 19:56:58 -07001547 def _setup_rpc(self, port, command_name, remote_pid=None):
beeps32a63082013-08-22 14:02:29 -07001548 """Sets up a tunnel process and performs rpc connection book keeping.
1549
1550 This method assumes that xmlrpc and jsonrpc never conflict, since
1551 we can only either have an xmlrpc or a jsonrpc server listening on
1552 a remote port. As such, it enforces a single proxy->remote port
1553 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1554 and then tries to start an xmlrpc proxy forwarded to the same port,
1555 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1556
1557 1. None of the methods on the xmlrpc proxy will work because
1558 the server listening on B is jsonrpc.
1559
1560 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1561 server, as the only use case currently is goofy, which is tied to
1562 the factory image. It is much easier to handle a failed xmlrpc
1563 call on the client than it is to terminate goofy in this scenario,
1564 as doing the latter might leave the DUT in a hard to recover state.
1565
1566 With the current implementation newer rpc proxy connections will
1567 terminate the tunnel processes of older rpc connections tunneling
1568 to the same remote port. If methods are invoked on the client
1569 after this has happened they will fail with connection closed errors.
1570
1571 @param port: The remote forwarding port.
1572 @param command_name: The name of the remote process, to terminate
1573 using pkill.
1574
1575 @return A url that we can use to initiate the rpc connection.
1576 """
1577 self.rpc_disconnect(port)
1578 local_port = utils.get_unused_port()
1579 tunnel_proc = self._create_ssh_tunnel(port, local_port)
Christopher Wileydd181852013-10-10 19:56:58 -07001580 self._rpc_proxy_map[port] = (command_name, tunnel_proc, remote_pid)
beeps32a63082013-08-22 14:02:29 -07001581 return self._RPC_PROXY_URL % local_port
1582
1583
Christopher Wileyd78249a2013-03-01 13:05:31 -08001584 def xmlrpc_connect(self, command, port, command_name=None,
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001585 ready_test_name=None, timeout_seconds=10,
1586 logfile='/dev/null'):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001587 """Connect to an XMLRPC server on the host.
1588
1589 The `command` argument should be a simple shell command that
1590 starts an XMLRPC server on the given `port`. The command
1591 must not daemonize, and must terminate cleanly on SIGTERM.
1592 The command is started in the background on the host, and a
1593 local XMLRPC client for the server is created and returned
1594 to the caller.
1595
1596 Note that the process of creating an XMLRPC client makes no
1597 attempt to connect to the remote server; the caller is
1598 responsible for determining whether the server is running
1599 correctly, and is ready to serve requests.
1600
Christopher Wileyd78249a2013-03-01 13:05:31 -08001601 Optionally, the caller can pass ready_test_name, a string
1602 containing the name of a method to call on the proxy. This
1603 method should take no parameters and return successfully only
1604 when the server is ready to process client requests. When
1605 ready_test_name is set, xmlrpc_connect will block until the
1606 proxy is ready, and throw a TestError if the server isn't
1607 ready by timeout_seconds.
1608
beeps32a63082013-08-22 14:02:29 -07001609 If a server is already running on the remote port, this
1610 method will kill it and disconnect the tunnel process
1611 associated with the connection before establishing a new one,
1612 by consulting the rpc_proxy_map in rpc_disconnect.
1613
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001614 @param command Shell command to start the server.
1615 @param port Port number on which the server is expected to
1616 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001617 @param command_name String to use as input to `pkill` to
1618 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001619 @param ready_test_name String containing the name of a
1620 method defined on the XMLRPC server.
1621 @param timeout_seconds Number of seconds to wait
1622 for the server to become 'ready.' Will throw a
1623 TestFail error if server is not ready in time.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001624 @param logfile Logfile to send output when running
1625 'command' argument.
Yusuf Mohsinally8d19e3c2013-11-21 14:25:45 -08001626
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001627 """
Christopher Wileyc14f06a2013-10-16 13:55:39 -07001628 # Clean up any existing state. If the caller is willing
1629 # to believe their server is down, we ought to clean up
1630 # any tunnels we might have sitting around.
1631 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001632 # Start the server on the host. Redirection in the command
1633 # below is necessary, because 'ssh' won't terminate until
1634 # background child processes close stdin, stdout, and
1635 # stderr.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001636 remote_cmd = '%s </dev/null >%s 2>&1 & echo $!' % (command, logfile)
Christopher Wileydd181852013-10-10 19:56:58 -07001637 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001638 logging.debug('Started XMLRPC server on host %s, pid = %s',
1639 self.hostname, remote_pid)
1640
Christopher Wileydd181852013-10-10 19:56:58 -07001641 # Tunnel through SSH to be able to reach that remote port.
1642 rpc_url = self._setup_rpc(port, command_name, remote_pid=remote_pid)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001643 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001644
Christopher Wileyd78249a2013-03-01 13:05:31 -08001645 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001646 # retry.retry logs each attempt; calculate delay_sec to
1647 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001648 @retry.retry((socket.error,
1649 xmlrpclib.ProtocolError,
1650 httplib.BadStatusLine),
Chris Sosa65425082013-10-16 13:26:22 -07001651 timeout_min=timeout_seconds / 60.0,
1652 delay_sec=min(max(timeout_seconds / 20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001653 def ready_test():
1654 """ Call proxy.ready_test_name(). """
1655 getattr(proxy, ready_test_name)()
1656 successful = False
1657 try:
1658 logging.info('Waiting %d seconds for XMLRPC server '
1659 'to start.', timeout_seconds)
1660 ready_test()
1661 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001662 finally:
1663 if not successful:
1664 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001665 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001666 logging.info('XMLRPC server started successfully.')
1667 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001668
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001669
Jason Abeleb6f924f2013-11-13 16:01:54 -08001670 def syslog(self, message, tag='autotest'):
1671 """Logs a message to syslog on host.
1672
1673 @param message String message to log into syslog
1674 @param tag String tag prefix for syslog
1675
1676 """
1677 self.run('logger -t "%s" "%s"' % (tag, message))
1678
1679
beeps32a63082013-08-22 14:02:29 -07001680 def jsonrpc_connect(self, port):
1681 """Creates a jsonrpc proxy connection through an ssh tunnel.
1682
1683 This method exists to facilitate communication with goofy (which is
1684 the default system manager on all factory images) and as such, leaves
1685 most of the rpc server sanity checking to the caller. Unlike
1686 xmlrpc_connect, this method does not facilitate the creation of a remote
1687 jsonrpc server, as the only clients of this code are factory tests,
1688 for which the goofy system manager is built in to the image and starts
1689 when the target boots.
1690
1691 One can theoretically create multiple jsonrpc proxies all forwarded
1692 to the same remote port, provided the remote port has an rpc server
1693 listening. However, in doing so we stand the risk of leaking an
1694 existing tunnel process, so we always disconnect any older tunnels
1695 we might have through rpc_disconnect.
1696
1697 @param port: port on the remote host that is serving this proxy.
1698
1699 @return: The client proxy.
1700 """
1701 if not jsonrpclib:
1702 logging.warning('Jsonrpclib could not be imported. Check that '
1703 'site-packages contains jsonrpclib.')
1704 return None
1705
1706 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1707
1708 logging.info('Established a jsonrpc connection through port %s.', port)
1709 return proxy
1710
1711
1712 def rpc_disconnect(self, port):
1713 """Disconnect from an RPC server on the host.
1714
1715 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001716 the given `port`. Also closes the local ssh tunnel created
1717 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001718 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001719 client object; however disconnection will cause all
1720 subsequent calls to methods on the object to fail.
1721
1722 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001723 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001724
1725 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001726 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001727 """
beeps32a63082013-08-22 14:02:29 -07001728 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001729 return
Christopher Wileydd181852013-10-10 19:56:58 -07001730 remote_name, tunnel_proc, remote_pid = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001731 if remote_name:
1732 # We use 'pkill' to find our target process rather than
1733 # a PID, because the host may have rebooted since
1734 # connecting, and we don't want to kill an innocent
1735 # process with the same PID.
1736 #
1737 # 'pkill' helpfully exits with status 1 if no target
1738 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001739 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001740 # status.
1741 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001742 if remote_pid:
1743 logging.info('Waiting for RPC server "%s" shutdown',
1744 remote_name)
1745 start_time = time.time()
1746 while (time.time() - start_time <
1747 self._RPC_SHUTDOWN_TIMEOUT_SECONDS):
1748 running_processes = self.run(
1749 "pgrep -f '%s'" % remote_name,
1750 ignore_status=True).stdout.split()
1751 if not remote_pid in running_processes:
1752 logging.info('Shut down RPC server.')
1753 break
1754 time.sleep(self._RPC_SHUTDOWN_POLLING_PERIOD_SECONDS)
1755 else:
1756 raise error.TestError('Failed to shutdown RPC server %s' %
1757 remote_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001758
1759 if tunnel_proc.poll() is None:
1760 tunnel_proc.terminate()
1761 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1762 else:
1763 logging.debug('Tunnel pid %d terminated early, status %d',
1764 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001765 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001766
1767
beeps32a63082013-08-22 14:02:29 -07001768 def rpc_disconnect_all(self):
1769 """Disconnect all known RPC proxy ports."""
1770 for port in self._rpc_proxy_map.keys():
1771 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001772
1773
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001774 def poor_mans_rpc(self, fun):
1775 """
1776 Calls a function from client utils on the host and returns a string.
1777
1778 @param fun function in client utils namespace.
1779 @return output string from calling fun.
1780 """
Simran Basi263a9d32014-08-19 11:16:51 -07001781 script = 'cd %s/bin; ' % autotest.Autotest.get_installed_autodir(self)
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001782 script += 'python -c "import common; import utils;'
1783 script += 'print utils.%s"' % fun
1784 return script
1785
1786
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001787 def _ping_check_status(self, status):
1788 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001789
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001790 @param status Check the ping status against this value.
1791 @return True iff `status` and the result of ping are the same
1792 (i.e. both True or both False).
1793
1794 """
1795 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1796 return not (status ^ (ping_val == 0))
1797
1798 def _ping_wait_for_status(self, status, timeout):
1799 """Wait for the host to have a given status (UP or DOWN).
1800
1801 Status is checked by polling. Polling will not last longer
1802 than the number of seconds in `timeout`. The polling
1803 interval will be long enough that only approximately
1804 _PING_WAIT_COUNT polling cycles will be executed, subject
1805 to a maximum interval of about one minute.
1806
1807 @param status Waiting will stop immediately if `ping` of the
1808 host returns this status.
1809 @param timeout Poll for at most this many seconds.
1810 @return True iff the host status from `ping` matched the
1811 requested status at the time of return.
1812
1813 """
1814 # _ping_check_status() takes about 1 second, hence the
1815 # "- 1" in the formula below.
1816 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1817 end_time = time.time() + timeout
1818 while time.time() <= end_time:
1819 if self._ping_check_status(status):
1820 return True
1821 if poll_interval > 0:
1822 time.sleep(poll_interval)
1823
1824 # The last thing we did was sleep(poll_interval), so it may
1825 # have been too long since the last `ping`. Check one more
1826 # time, just to be sure.
1827 return self._ping_check_status(status)
1828
1829 def ping_wait_up(self, timeout):
1830 """Wait for the host to respond to `ping`.
1831
1832 N.B. This method is not a reliable substitute for
1833 `wait_up()`, because a host that responds to ping will not
1834 necessarily respond to ssh. This method should only be used
1835 if the target DUT can be considered functional even if it
1836 can't be reached via ssh.
1837
1838 @param timeout Minimum time to allow before declaring the
1839 host to be non-responsive.
1840 @return True iff the host answered to ping before the timeout.
1841
1842 """
1843 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001844
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001845 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001846 """Wait until the host no longer responds to `ping`.
1847
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001848 This function can be used as a slightly faster version of
1849 `wait_down()`, by avoiding potentially long ssh timeouts.
1850
1851 @param timeout Minimum time to allow for the host to become
1852 non-responsive.
1853 @return True iff the host quit answering ping before the
1854 timeout.
1855
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001856 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001857 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001858
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001859 def test_wait_for_sleep(self, sleep_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001860 """Wait for the client to enter low-power sleep mode.
1861
1862 The test for "is asleep" can't distinguish a system that is
1863 powered off; to confirm that the unit was asleep, it is
1864 necessary to force resume, and then call
1865 `test_wait_for_resume()`.
1866
1867 This function is expected to be called from a test as part
1868 of a sequence like the following:
1869
1870 ~~~~~~~~
1871 boot_id = host.get_boot_id()
1872 # trigger sleep on the host
1873 host.test_wait_for_sleep()
1874 # trigger resume on the host
1875 host.test_wait_for_resume(boot_id)
1876 ~~~~~~~~
1877
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001878 @param sleep_timeout time limit in seconds to allow the host sleep.
1879
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001880 @exception TestFail The host did not go to sleep within
1881 the allowed time.
1882 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001883 if sleep_timeout is None:
1884 sleep_timeout = self.SLEEP_TIMEOUT
1885
1886 if not self.ping_wait_down(timeout=sleep_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001887 raise error.TestFail(
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001888 'client failed to sleep after %d seconds' % sleep_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001889
1890
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001891 def test_wait_for_resume(self, old_boot_id, resume_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001892 """Wait for the client to resume from low-power sleep mode.
1893
1894 The `old_boot_id` parameter should be the value from
1895 `get_boot_id()` obtained prior to entering sleep mode. A
1896 `TestFail` exception is raised if the boot id changes.
1897
1898 See @ref test_wait_for_sleep for more on this function's
1899 usage.
1900
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001901 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001902 target host went to sleep.
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001903 @param resume_timeout time limit in seconds to allow the host up.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001904
1905 @exception TestFail The host did not respond within the
1906 allowed time.
1907 @exception TestFail The host responded, but the boot id test
1908 indicated a reboot rather than a sleep
1909 cycle.
1910 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001911 if resume_timeout is None:
1912 resume_timeout = self.RESUME_TIMEOUT
1913
1914 if not self.wait_up(timeout=resume_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001915 raise error.TestFail(
1916 'client failed to resume from sleep after %d seconds' %
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001917 resume_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001918 else:
1919 new_boot_id = self.get_boot_id()
1920 if new_boot_id != old_boot_id:
1921 raise error.TestFail(
1922 'client rebooted, but sleep was expected'
1923 ' (old boot %s, new boot %s)'
1924 % (old_boot_id, new_boot_id))
1925
1926
1927 def test_wait_for_shutdown(self):
1928 """Wait for the client to shut down.
1929
1930 The test for "has shut down" can't distinguish a system that
1931 is merely asleep; to confirm that the unit was down, it is
1932 necessary to force boot, and then call test_wait_for_boot().
1933
1934 This function is expected to be called from a test as part
1935 of a sequence like the following:
1936
1937 ~~~~~~~~
1938 boot_id = host.get_boot_id()
1939 # trigger shutdown on the host
1940 host.test_wait_for_shutdown()
1941 # trigger boot on the host
1942 host.test_wait_for_boot(boot_id)
1943 ~~~~~~~~
1944
1945 @exception TestFail The host did not shut down within the
1946 allowed time.
1947 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001948 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001949 raise error.TestFail(
1950 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001951 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001952
1953
1954 def test_wait_for_boot(self, old_boot_id=None):
1955 """Wait for the client to boot from cold power.
1956
1957 The `old_boot_id` parameter should be the value from
1958 `get_boot_id()` obtained prior to shutting down. A
1959 `TestFail` exception is raised if the boot id does not
1960 change. The boot id test is omitted if `old_boot_id` is not
1961 specified.
1962
1963 See @ref test_wait_for_shutdown for more on this function's
1964 usage.
1965
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001966 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001967 shut down.
1968
1969 @exception TestFail The host did not respond within the
1970 allowed time.
1971 @exception TestFail The host responded, but the boot id test
1972 indicated that there was no reboot.
1973 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001974 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001975 raise error.TestFail(
1976 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001977 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001978 elif old_boot_id:
1979 if self.get_boot_id() == old_boot_id:
1980 raise error.TestFail(
1981 'client is back up, but did not reboot'
1982 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001983
1984
1985 @staticmethod
1986 def check_for_rpm_support(hostname):
1987 """For a given hostname, return whether or not it is powered by an RPM.
1988
Simran Basi1df55112013-09-06 11:25:09 -07001989 @param hostname: hostname to check for rpm support.
1990
Simran Basid5e5e272012-09-24 15:23:59 -07001991 @return None if this host does not follows the defined naming format
1992 for RPM powered DUT's in the lab. If it does follow the format,
1993 it returns a regular expression MatchObject instead.
1994 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001995 return re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001996
1997
1998 def has_power(self):
1999 """For this host, return whether or not it is powered by an RPM.
2000
2001 @return True if this host is in the CROS lab and follows the defined
2002 naming format.
2003 """
Fang Deng0ca40e22013-08-27 17:47:44 -07002004 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002005
2006
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002007 def _set_power(self, state, power_method):
2008 """Sets the power to the host via RPM, Servo or manual.
2009
2010 @param state Specifies which power state to set to DUT
2011 @param power_method Specifies which method of power control to
2012 use. By default "RPM" will be used. Valid values
2013 are the strings "RPM", "manual", "servoj10".
2014
2015 """
2016 ACCEPTABLE_STATES = ['ON', 'OFF']
2017
2018 if state.upper() not in ACCEPTABLE_STATES:
2019 raise error.TestError('State must be one of: %s.'
2020 % (ACCEPTABLE_STATES,))
2021
2022 if power_method == self.POWER_CONTROL_SERVO:
2023 logging.info('Setting servo port J10 to %s', state)
2024 self.servo.set('prtctl3_pwren', state.lower())
2025 time.sleep(self._USB_POWER_TIMEOUT)
2026 elif power_method == self.POWER_CONTROL_MANUAL:
2027 logging.info('You have %d seconds to set the AC power to %s.',
2028 self._POWER_CYCLE_TIMEOUT, state)
2029 time.sleep(self._POWER_CYCLE_TIMEOUT)
2030 else:
2031 if not self.has_power():
2032 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07002033 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
2034 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
2035 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07002036 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07002037
2038
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002039 def power_off(self, power_method=POWER_CONTROL_RPM):
2040 """Turn off power to this host via RPM, Servo or manual.
2041
2042 @param power_method Specifies which method of power control to
2043 use. By default "RPM" will be used. Valid values
2044 are the strings "RPM", "manual", "servoj10".
2045
2046 """
2047 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07002048
2049
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002050 def power_on(self, power_method=POWER_CONTROL_RPM):
2051 """Turn on power to this host via RPM, Servo or manual.
2052
2053 @param power_method Specifies which method of power control to
2054 use. By default "RPM" will be used. Valid values
2055 are the strings "RPM", "manual", "servoj10".
2056
2057 """
2058 self._set_power('ON', power_method)
2059
2060
2061 def power_cycle(self, power_method=POWER_CONTROL_RPM):
2062 """Cycle power to this host by turning it OFF, then ON.
2063
2064 @param power_method Specifies which method of power control to
2065 use. By default "RPM" will be used. Valid values
2066 are the strings "RPM", "manual", "servoj10".
2067
2068 """
2069 if power_method in (self.POWER_CONTROL_SERVO,
2070 self.POWER_CONTROL_MANUAL):
2071 self.power_off(power_method=power_method)
2072 time.sleep(self._POWER_CYCLE_TIMEOUT)
2073 self.power_on(power_method=power_method)
2074 else:
2075 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002076
2077
2078 def get_platform(self):
2079 """Determine the correct platform label for this host.
2080
2081 @returns a string representing this host's platform.
2082 """
2083 crossystem = utils.Crossystem(self)
2084 crossystem.init()
2085 # Extract fwid value and use the leading part as the platform id.
2086 # fwid generally follow the format of {platform}.{firmware version}
2087 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
2088 platform = crossystem.fwid().split('.')[0].lower()
2089 # Newer platforms start with 'Google_' while the older ones do not.
2090 return platform.replace('google_', '')
2091
2092
Hung-ying Tyanb1328032014-04-01 14:18:54 +08002093 def get_architecture(self):
2094 """Determine the correct architecture label for this host.
2095
2096 @returns a string representing this host's architecture.
2097 """
2098 crossystem = utils.Crossystem(self)
2099 crossystem.init()
2100 return crossystem.arch()
2101
2102
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002103 def get_chrome_version(self):
2104 """Gets the Chrome version number and milestone as strings.
2105
2106 Invokes "chrome --version" to get the version number and milestone.
2107
2108 @return A tuple (chrome_ver, milestone) where "chrome_ver" is the
2109 current Chrome version number as a string (in the form "W.X.Y.Z")
2110 and "milestone" is the first component of the version number
2111 (the "W" from "W.X.Y.Z"). If the version number cannot be parsed
2112 in the "W.X.Y.Z" format, the "chrome_ver" will be the full output
2113 of "chrome --version" and the milestone will be the empty string.
2114
2115 """
MK Ryu35d661e2014-09-25 17:44:10 -07002116 version_string = self.run(client_constants.CHROME_VERSION_COMMAND).stdout
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002117 return utils.parse_chrome_version(version_string)
2118
Aviv Keshet74c89a92013-02-04 15:18:30 -08002119 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07002120 def get_board(self):
2121 """Determine the correct board label for this host.
2122
2123 @returns a string representing this host's board.
2124 """
2125 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
2126 run_method=self.run)
2127 board = release_info['CHROMEOS_RELEASE_BOARD']
2128 # Devices in the lab generally have the correct board name but our own
2129 # development devices have {board_name}-signed-{key_type}. The board
2130 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08002131 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07002132 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08002133 return board_format_string % board.split('-')[0]
2134 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07002135
2136
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002137 @label_decorator('board_freq_mem')
2138 def get_board_with_frequency_and_memory(self):
2139 """
2140 Determines the board name with frequency and memory.
2141
2142 @returns a more detailed string representing the board. Examples are
2143 butterfly_1.1GHz_2GB, link_1.8GHz_4GB, x86-zgb_1.7GHz_2GB
2144 """
2145 board = self.run(self.poor_mans_rpc(
2146 'get_board_with_frequency_and_memory()')).stdout
2147 return 'board_freq_mem:%s' % str.strip(board)
2148
2149
Aviv Keshet74c89a92013-02-04 15:18:30 -08002150 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002151 def has_lightsensor(self):
2152 """Determine the correct board label for this host.
2153
2154 @returns the string 'lightsensor' if this host has a lightsensor or
2155 None if it does not.
2156 """
2157 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08002158 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07002159 try:
2160 # Run the search cmd following the symlinks. Stderr_tee is set to
2161 # None as there can be a symlink loop, but the command will still
2162 # execute correctly with a few messages printed to stderr.
2163 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
2164 return 'lightsensor'
2165 except error.AutoservRunError:
2166 # egrep exited with a return code of 1 meaning none of the possible
2167 # lightsensor files existed.
2168 return None
2169
2170
Aviv Keshet74c89a92013-02-04 15:18:30 -08002171 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002172 def has_bluetooth(self):
2173 """Determine the correct board label for this host.
2174
2175 @returns the string 'bluetooth' if this host has bluetooth or
2176 None if it does not.
2177 """
2178 try:
2179 self.run('test -d /sys/class/bluetooth/hci0')
2180 # test exited with a return code of 0.
2181 return 'bluetooth'
2182 except error.AutoservRunError:
2183 # test exited with a return code 1 meaning the directory did not
2184 # exist.
2185 return None
2186
2187
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002188 @label_decorator('gpu_family')
2189 def get_gpu_family(self):
2190 """
2191 Determine GPU family.
2192
2193 @returns a string representing the gpu family. Examples are mali, tegra,
2194 pinetrail, sandybridge, ivybridge, haswell and baytrail.
2195 """
2196 gpu_family = self.run(self.poor_mans_rpc('get_gpu_family()')).stdout
2197 return 'gpu_family:%s' % str.strip(gpu_family)
2198
2199
Ilja Friedel0ce0b602013-08-15 18:45:27 -07002200 @label_decorator('graphics')
2201 def get_graphics(self):
2202 """
2203 Determine the correct board label for this host.
2204
2205 @returns a string representing this host's graphics. For now ARM boards
2206 return graphics:gles while all other boards return graphics:gl. This
2207 may change over time, but for robustness reasons this should avoid
2208 executing code in actual graphics libraries (which may not be ready and
2209 is tested by graphics_GLAPICheck).
2210 """
2211 uname = self.run('uname -a').stdout.lower()
2212 if 'arm' in uname:
2213 return 'graphics:gles'
2214 return 'graphics:gl'
2215
2216
Bill Richardson4f595f52014-02-13 16:20:26 -08002217 @label_decorator('ec')
2218 def get_ec(self):
2219 """
2220 Determine the type of EC on this host.
2221
2222 @returns a string representing this host's embedded controller type.
2223 At present, it only returns "ec:cros", for Chrome OS ECs. Other types
2224 of EC (or none) don't return any strings, since no tests depend on
2225 those.
2226 """
2227 cmd = 'mosys ec info'
2228 # The output should look like these, so that the last field should
2229 # match our EC version scheme:
2230 #
2231 # stm | stm32f100 | snow_v1.3.139-375eb9f
2232 # ti | Unknown-10de | peppy_v1.5.114-5d52788
2233 #
2234 # Non-Chrome OS ECs will look like these:
2235 #
2236 # ENE | KB932 | 00BE107A00
2237 # ite | it8518 | 3.08
2238 #
2239 # And some systems don't have ECs at all (Lumpy, for example).
2240 regexp = r'^.*\|\s*(\S+_v\d+\.\d+\.\d+-[0-9a-f]+)\s*$'
2241
2242 ecinfo = self.run(command=cmd, ignore_status=True)
2243 if ecinfo.exit_status == 0:
2244 res = re.search(regexp, ecinfo.stdout)
2245 if res:
2246 logging.info("EC version is %s", res.groups()[0])
2247 return 'ec:cros'
2248 logging.info("%s got: %s", cmd, ecinfo.stdout)
2249 # Has an EC, but it's not a Chrome OS EC
2250 return None
2251 logging.info("%s exited with status %d", cmd, ecinfo.exit_status)
2252 # No EC present
2253 return None
2254
2255
Alec Berg31b932b2014-04-04 16:09:11 -07002256 @label_decorator('accels')
2257 def get_accels(self):
2258 """
2259 Determine the type of accelerometers on this host.
2260
2261 @returns a string representing this host's accelerometer type.
2262 At present, it only returns "accel:cros-ec", for accelerometers
2263 attached to a Chrome OS EC, or none, if no accelerometers.
2264 """
2265 # Check to make sure we have ectool
2266 rv = self.run('which ectool', ignore_status=True)
2267 if rv.exit_status:
2268 logging.info("No ectool cmd found, assuming no EC accelerometers")
2269 return None
2270
2271 # Check that the EC supports the motionsense command
2272 rv = self.run('ectool motionsense', ignore_status=True)
2273 if rv.exit_status:
2274 logging.info("EC does not support motionsense command "
2275 "assuming no EC accelerometers")
2276 return None
2277
2278 # Check that EC motion sensors are active
2279 active = self.run('ectool motionsense active').stdout.split('\n')
2280 if active[0] == "0":
2281 logging.info("Motion sense inactive, assuming no EC accelerometers")
2282 return None
2283
2284 logging.info("EC accelerometers found")
2285 return 'accel:cros-ec'
2286
2287
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002288 @label_decorator('chameleon')
2289 def has_chameleon(self):
2290 """Determine if a Chameleon connected to this host.
2291
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002292 @returns a list containing two strings ('chameleon' and
2293 'chameleon:' + label, e.g. 'chameleon:hdmi') if this host
2294 has a Chameleon or None if it has not.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002295 """
2296 if self._chameleon_host:
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002297 return ['chameleon', 'chameleon:' + self.chameleon.get_label()]
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002298 else:
2299 return None
2300
2301
Derek Basehorec71ff622014-07-07 15:18:40 -07002302 @label_decorator('power_supply')
2303 def get_power_supply(self):
2304 """
2305 Determine what type of power supply the host has
2306
2307 @returns a string representing this host's power supply.
2308 'power:battery' when the device has a battery intended for
2309 extended use
2310 'power:AC_primary' when the device has a battery not intended
2311 for extended use (for moving the machine, etc)
2312 'power:AC_only' when the device has no battery at all.
2313 """
2314 psu = self.run(command='mosys psu type', ignore_status=True)
2315 if psu.exit_status:
2316 # The psu command for mosys is not included for all platforms. The
2317 # assumption is that the device will have a battery if the command
2318 # is not found.
2319 return 'power:battery'
2320
2321 psu_str = psu.stdout.strip()
2322 if psu_str == 'unknown':
2323 return None
2324
2325 return 'power:%s' % psu_str
2326
2327
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002328 @label_decorator('storage')
2329 def get_storage(self):
2330 """
2331 Determine the type of boot device for this host.
2332
2333 Determine if the internal device is SCSI or dw_mmc device.
2334 Then check that it is SSD or HDD or eMMC or something else.
2335
2336 @returns a string representing this host's internal device type.
2337 'storage:ssd' when internal device is solid state drive
2338 'storage:hdd' when internal device is hard disk drive
2339 'storage:mmc' when internal device is mmc drive
2340 None When internal device is something else or
2341 when we are unable to determine the type
2342 """
2343 # The output should be /dev/mmcblk* for SD/eMMC or /dev/sd* for scsi
2344 rootdev_cmd = ' '.join(['. /usr/sbin/write_gpt.sh;',
2345 '. /usr/share/misc/chromeos-common.sh;',
2346 'load_base_vars;',
2347 'get_fixed_dst_drive'])
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002348 rootdev = self.run(command=rootdev_cmd, ignore_status=True)
2349 if rootdev.exit_status:
2350 logging.info("Fail to run %s", rootdev_cmd)
2351 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002352 rootdev_str = rootdev.stdout.strip()
2353
2354 if not rootdev_str:
2355 return None
2356
2357 rootdev_base = os.path.basename(rootdev_str)
2358
2359 mmc_pattern = '/dev/mmcblk[0-9]'
2360 if re.match(mmc_pattern, rootdev_str):
2361 # Use type to determine if the internal device is eMMC or somthing
2362 # else. We can assume that MMC is always an internal device.
2363 type_cmd = 'cat /sys/block/%s/device/type' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002364 type = self.run(command=type_cmd, ignore_status=True)
2365 if type.exit_status:
2366 logging.info("Fail to run %s", type_cmd)
2367 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002368 type_str = type.stdout.strip()
2369
2370 if type_str == 'MMC':
2371 return 'storage:mmc'
2372
2373 scsi_pattern = '/dev/sd[a-z]+'
2374 if re.match(scsi_pattern, rootdev.stdout):
2375 # Read symlink for /sys/block/sd* to determine if the internal
2376 # device is connected via ata or usb.
2377 link_cmd = 'readlink /sys/block/%s' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002378 link = self.run(command=link_cmd, ignore_status=True)
2379 if link.exit_status:
2380 logging.info("Fail to run %s", link_cmd)
2381 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002382 link_str = link.stdout.strip()
2383 if 'usb' in link_str:
2384 return None
2385
2386 # Read rotation to determine if the internal device is ssd or hdd.
2387 rotate_cmd = str('cat /sys/block/%s/queue/rotational'
2388 % rootdev_base)
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002389 rotate = self.run(command=rotate_cmd, ignore_status=True)
2390 if rotate.exit_status:
2391 logging.info("Fail to run %s", rotate_cmd)
2392 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002393 rotate_str = rotate.stdout.strip()
2394
2395 rotate_dict = {'0':'storage:ssd', '1':'storage:hdd'}
2396 return rotate_dict.get(rotate_str)
2397
2398 # All other internal device / error case will always fall here
2399 return None
2400
2401
Dan Shi4e9a2aa2014-03-24 14:28:42 -07002402 @label_decorator('servo')
2403 def get_servo(self):
2404 """Determine if the host has a servo attached.
2405
2406 If the host has a working servo attached, it should have a servo label.
2407
2408 @return: string 'servo' if the host has servo attached. Otherwise,
2409 returns None.
2410 """
2411 return 'servo' if self._servo_host else None
2412
2413
Dan Shi5beba472014-05-28 22:46:07 -07002414 @label_decorator('video_labels')
2415 def get_video_labels(self):
2416 """Run /usr/local/bin/avtest_label_detect to get a list of video labels.
2417
2418 Sample output of avtest_label_detect:
2419 Detected label: hw_video_acc_vp8
2420 Detected label: webcam
2421
2422 @return: A list of labels detected by tool avtest_label_detect.
2423 """
2424 try:
Simran Basi40ca8182014-07-17 18:41:20 -07002425 # TODO (sbasi) crbug.com/391081 - Remove once the proper fix has
2426 # landed and supporting images older than the fix is no longer
2427 # necessary.
2428 # Change back to VT1 so avtest_label_detect does not get stuck.
2429 self.run('chvt 1')
Dan Shi5beba472014-05-28 22:46:07 -07002430 result = self.run('/usr/local/bin/avtest_label_detect').stdout
2431 return re.findall('^Detected label: (\w+)$', result, re.M)
2432 except error.AutoservRunError:
2433 # The tool is not installed.
2434 return []
2435
2436
mussa584b4462014-06-20 15:13:28 -07002437 @label_decorator('video_glitch_detection')
2438 def is_video_glitch_detection_supported(self):
2439 """ Determine if a board under test is supported for video glitch
2440 detection tests.
2441
2442 @return: 'video_glitch_detection' if board is supported, None otherwise.
2443 """
2444 parser = ConfigParser.SafeConfigParser()
2445 filename = os.path.join(
2446 common.autotest_dir, 'client/cros/video/device_spec.conf')
2447
2448 dut = self.get_board().replace(ds_constants.BOARD_PREFIX, '')
2449
2450 try:
2451 parser.read(filename)
mussa584b4462014-06-20 15:13:28 -07002452 supported_boards = parser.sections()
2453
Mussa83c84d62014-10-02 12:11:28 -07002454 return 'video_glitch_detection' if dut in supported_boards else None
mussa584b4462014-06-20 15:13:28 -07002455
2456 except ConfigParser.error:
2457 # something went wrong while parsing the conf file
2458 return None
2459
Katherine Threlkeld7b97a9f2014-06-24 13:47:14 -07002460 @label_decorator('touch_labels')
2461 def get_touch(self):
2462 """
2463 Determine whether board under test has a touchpad or touchscreen.
2464
2465 @return: A list of some combination of 'touchscreen' and 'touchpad',
2466 depending on what is present on the device.
2467 """
2468 labels = []
2469 input_cmd = '/opt/google/input/inputcontrol --names -t %s'
2470 for elt in ['touchpad', 'touchscreen']:
2471 if self.run(input_cmd % elt).stdout:
2472 labels.append(elt)
2473 return labels
2474
2475
mussa584b4462014-06-20 15:13:28 -07002476
Simran Basic6f1f7a2012-10-16 10:47:46 -07002477 def get_labels(self):
2478 """Return a list of labels for this given host.
2479
2480 This is the main way to retrieve all the automatic labels for a host
2481 as it will run through all the currently implemented label functions.
2482 """
2483 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08002484 for label_function in self._LABEL_FUNCTIONS:
J. Richard Barnetteb869b222014-09-03 17:55:44 -07002485 try:
2486 label = label_function(self)
2487 except Exception as e:
2488 logging.error('Label function %s failed; ignoring it.',
2489 label_function.__name__)
2490 logging.exception(e)
2491 label = None
Simran Basic6f1f7a2012-10-16 10:47:46 -07002492 if label:
Dan Shi5beba472014-05-28 22:46:07 -07002493 if type(label) is str:
2494 labels.append(label)
2495 elif type(label) is list:
2496 labels.extend(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -07002497 return labels
Dan Shi85276d42014-04-08 22:11:45 -07002498
2499
2500 def is_boot_from_usb(self):
2501 """Check if DUT is boot from USB.
2502
2503 @return: True if DUT is boot from usb.
2504 """
2505 device = self.run('rootdev -s -d').stdout.strip()
2506 removable = int(self.run('cat /sys/block/%s/removable' %
2507 os.path.basename(device)).stdout.strip())
2508 return removable == 1