blob: 954ff642a442f8bccc69e607591d104248a79b7b [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
mussa584b4462014-06-20 15:13:28 -07005import ConfigParser
Aviv Keshet74c89a92013-02-04 15:18:30 -08006import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07007import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import logging
Dan Shi0f466e82013-02-22 15:44:58 -08009import os
Simran Basid5e5e272012-09-24 15:23:59 -070010import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080011import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070014import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070015
mussa584b4462014-06-20 15:13:28 -070016import common
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.bin import utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070018from autotest_lib.client.common_lib import autotemp
Richard Barnette0c73ffc2012-11-19 15:21:18 -080019from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080022from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080023from autotest_lib.client.common_lib.cros import retry
Dan Shi7dca56e2014-11-11 17:07:56 -080024from autotest_lib.client.common_lib.cros.graphite import es_utils
Michael Liangda8c60a2014-06-03 13:24:51 -070025from autotest_lib.client.common_lib.cros.graphite import stats
MK Ryu35d661e2014-09-25 17:44:10 -070026from autotest_lib.client.cros import constants as client_constants
J. Richard Barnette84890bd2014-02-21 11:05:47 -080027from autotest_lib.client.cros import cros_ui
MK Ryu35d661e2014-09-25 17:44:10 -070028from autotest_lib.server import autoserv_parser
29from autotest_lib.server import autotest
30from autotest_lib.server import constants
31from autotest_lib.server import crashcollect
Dan Shia1ecd5c2013-06-06 11:21:31 -070032from autotest_lib.server import utils as server_utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070033from autotest_lib.server.cros import provision
Scott Zawalski89c44dd2013-02-26 09:28:02 -050034from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070035from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Dan Shi9cb0eec2014-06-03 09:04:50 -070036from autotest_lib.server.cros.faft.config.config import Config as FAFTConfig
Fang Deng96667ca2013-08-01 17:46:18 -070037from autotest_lib.server.hosts import abstract_ssh
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +080038from autotest_lib.server.hosts import chameleon_host
Fang Deng5d518f42013-08-02 14:04:32 -070039from autotest_lib.server.hosts import servo_host
Simran Basidcff4252012-11-20 16:13:20 -080040from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070041
42
beeps32a63082013-08-22 14:02:29 -070043try:
44 import jsonrpclib
45except ImportError:
46 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070047
Fang Dengd1c2b732013-08-20 12:59:46 -070048
beepsc87ff602013-07-31 21:53:00 -070049class FactoryImageCheckerException(error.AutoservError):
50 """Exception raised when an image is a factory image."""
51 pass
52
53
Aviv Keshet74c89a92013-02-04 15:18:30 -080054def add_label_detector(label_function_list, label_list=None, label=None):
55 """Decorator used to group functions together into the provided list.
56 @param label_function_list: List of label detecting functions to add
57 decorated function to.
58 @param label_list: List of detectable labels to add detectable labels to.
59 (Default: None)
60 @param label: Label string that is detectable by this detection function
61 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080062 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070063 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080064 """
65 @param func: The function to be added as a detector.
66 """
67 label_function_list.append(func)
68 if label and label_list is not None:
69 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070070 return func
71 return add_func
72
73
Fang Deng0ca40e22013-08-27 17:47:44 -070074class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070075 """Chromium OS specific subclass of Host."""
76
77 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050078 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070079
Richard Barnette03a0c132012-11-05 12:40:35 -080080 # Timeout values (in seconds) associated with various Chrome OS
81 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070082 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080083 # In general, a good rule of thumb is that the timeout can be up
84 # to twice the typical measured value on the slowest platform.
85 # The times here have not necessarily been empirically tested to
86 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070087 #
88 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080089 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
90 # time to restart the netwowrk.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080091 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070092 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080093 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080094 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070095 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080096 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080097 # network.
beepsf079cfb2013-09-18 17:49:51 -070098 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080099 # POWERWASH_BOOT_TIMEOUT: Time to allow for a reboot that
100 # includes powerwash.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700101
102 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800103 RESUME_TIMEOUT = 10
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +0800104 SHUTDOWN_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700105 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700106 USB_BOOT_TIMEOUT = 150
J. Richard Barnette7817b052014-08-28 09:47:29 -0700107 INSTALL_TIMEOUT = 480
Dan Shi2c88eed2013-11-12 10:18:38 -0800108 POWERWASH_BOOT_TIMEOUT = 60
Chris Sosab76e0ee2013-05-22 16:55:41 -0700109
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800110 # REBOOT_TIMEOUT: How long to wait for a reboot.
111 #
Chris Sosab76e0ee2013-05-22 16:55:41 -0700112 # We have a long timeout to ensure we don't flakily fail due to other
113 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
Simran Basi1160e2c2013-10-04 16:00:24 -0700114 # TODO(sbasi - crbug.com/276094) Restore to 5 mins once the 'host did not
115 # return from reboot' bug is solved.
116 REBOOT_TIMEOUT = 480
Chris Sosab76e0ee2013-05-22 16:55:41 -0700117
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800118 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
119 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
120 _USB_POWER_TIMEOUT = 5
121 _POWER_CYCLE_TIMEOUT = 10
122
beeps32a63082013-08-22 14:02:29 -0700123 _RPC_PROXY_URL = 'http://localhost:%d'
Christopher Wileydd181852013-10-10 19:56:58 -0700124 _RPC_SHUTDOWN_POLLING_PERIOD_SECONDS = 2
Peter Qiu4410db72014-06-05 10:32:41 -0700125 # Set shutdown timeout to account for the time for restarting the UI.
126 _RPC_SHUTDOWN_TIMEOUT_SECONDS = cros_ui.RESTART_UI_TIMEOUT
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800127
Richard Barnette82c35912012-11-20 10:09:10 -0800128 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
129 'rpm_recovery_boards', type=str).split(',')
130
131 _MAX_POWER_CYCLE_ATTEMPTS = 6
132 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
Fang Dengdeba14f2014-11-14 11:54:09 -0800133 _RPM_HOSTNAME_REGEX = ('chromeos(\d+)(-row(\d+))?-rack(\d+[a-z]*)'
134 '-host(\d+)')
Gwendal Grignoua66f1d12014-12-03 10:07:26 -0800135 _LIGHT_SENSOR_FILES = [ "in_illuminance0_input",
136 "in_illuminance_input",
137 "in_illuminance0_raw",
138 "in_illuminance_raw",
139 "illuminance0_input"]
Richard Barnette82c35912012-11-20 10:09:10 -0800140 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
141 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800142 _DETECTABLE_LABELS = []
143 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
144 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700145
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800146 # Constants used in ping_wait_up() and ping_wait_down().
147 #
148 # _PING_WAIT_COUNT is the approximate number of polling
149 # cycles to use when waiting for a host state change.
150 #
151 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
152 # for arguments to the internal _ping_wait_for_status()
153 # method.
154 _PING_WAIT_COUNT = 40
155 _PING_STATUS_DOWN = False
156 _PING_STATUS_UP = True
157
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800158 # Allowed values for the power_method argument.
159
160 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
161 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
162 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
163 POWER_CONTROL_RPM = 'RPM'
164 POWER_CONTROL_SERVO = 'servoj10'
165 POWER_CONTROL_MANUAL = 'manual'
166
167 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
168 POWER_CONTROL_SERVO,
169 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800170
Simran Basi5e6339a2013-03-21 11:34:32 -0700171 _RPM_OUTLET_CHANGED = 'outlet_changed'
172
Dan Shi9cb0eec2014-06-03 09:04:50 -0700173 # URL pattern to download firmware image.
174 _FW_IMAGE_URL_PATTERN = global_config.global_config.get_config_value(
175 'CROS', 'firmware_url_pattern', type=str)
beeps687243d2013-07-18 15:29:27 -0700176
MK Ryu35d661e2014-09-25 17:44:10 -0700177 # File that has a list of directories to be collected
178 _LOGS_TO_COLLECT_FILE = os.path.join(
179 common.client_dir, 'common_lib', 'logs_to_collect')
180
181 # Prefix of logging message w.r.t. crash collection
182 _CRASHLOGS_PREFIX = 'collect_crashlogs'
183
184 # Time duration waiting for host up/down check
185 _CHECK_HOST_UP_TIMEOUT_SECS = 15
186
187 # A command that interacts with kernel and hardware (e.g., rm, mkdir, etc)
188 # might not be completely done deep through the hardware when the machine
189 # is powered down right after the command returns.
190 # We should wait for a few seconds to make them done. Finger crossed.
191 _SAFE_WAIT_SECS = 10
192
193
J. Richard Barnette964fba02012-10-24 17:34:29 -0700194 @staticmethod
beeps46dadc92013-11-07 14:07:10 -0800195 def check_host(host, timeout=10):
196 """
197 Check if the given host is a chrome-os host.
198
199 @param host: An ssh host representing a device.
200 @param timeout: The timeout for the run command.
201
202 @return: True if the host device is chromeos.
203
beeps46dadc92013-11-07 14:07:10 -0800204 """
205 try:
Christopher Wiley1ea80942014-02-26 16:45:08 -0800206 result = host.run('grep -q CHROMEOS /etc/lsb-release && '
Simran Basie5f7ae42014-06-26 15:44:06 -0700207 '! which adb >/dev/null 2>&1 && '
208 '! grep -q moblab /etc/lsb-release',
Christopher Wileyfc3eac02013-11-21 16:24:57 -0800209 ignore_status=True, timeout=timeout)
beeps46dadc92013-11-07 14:07:10 -0800210 except (error.AutoservRunError, error.AutoservSSHTimeout):
211 return False
212 return result.exit_status == 0
213
214
215 @staticmethod
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800216 def _extract_arguments(args_dict, key_subset):
217 """Extract options from `args_dict` and return a subset result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800218
219 Take the provided dictionary of argument options and return
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800220 a subset that represent standard arguments needed to construct
221 a test-assistant object (chameleon or servo) for a host. The
222 intent is to provide standard argument processing from
223 run_remote_tests for tests that require a test-assistant board
224 to operate.
225
226 @param args_dict Dictionary from which to extract the arguments.
227 @param key_subset Tuple of keys to extract from the args_dict, e.g.
228 ('servo_host', 'servo_port').
229 """
230 result = {}
231 for arg in key_subset:
232 if arg in args_dict:
233 result[arg] = args_dict[arg]
234 return result
235
236
237 @staticmethod
238 def get_chameleon_arguments(args_dict):
239 """Extract chameleon options from `args_dict` and return the result.
240
241 Recommended usage:
242 ~~~~~~~~
243 args_dict = utils.args_to_dict(args)
244 chameleon_args = hosts.CrosHost.get_chameleon_arguments(args_dict)
245 host = hosts.create_host(machine, chameleon_args=chameleon_args)
246 ~~~~~~~~
247
248 @param args_dict Dictionary from which to extract the chameleon
249 arguments.
250 """
251 return CrosHost._extract_arguments(
252 args_dict, ('chameleon_host', 'chameleon_port'))
253
254
255 @staticmethod
256 def get_servo_arguments(args_dict):
257 """Extract servo options from `args_dict` and return the result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800258
259 Recommended usage:
260 ~~~~~~~~
261 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700262 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800263 host = hosts.create_host(machine, servo_args=servo_args)
264 ~~~~~~~~
265
266 @param args_dict Dictionary from which to extract the servo
267 arguments.
268 """
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800269 return CrosHost._extract_arguments(
270 args_dict, ('servo_host', 'servo_port'))
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700271
J. Richard Barnette964fba02012-10-24 17:34:29 -0700272
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800273 def _initialize(self, hostname, chameleon_args=None, servo_args=None,
274 ssh_verbosity_flag='', ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700275 *args, **dargs):
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800276 """Initialize superclasses, |self.chameleon|, and |self.servo|.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700277
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800278 This method checks whether a chameleon/servo (aka
279 test-assistant objects) is required by checking whether
280 chameleon_args/servo_args is None. This method will only
281 attempt to create the test-assistant object when it is
282 required by the test.
Fang Deng5d518f42013-08-02 14:04:32 -0700283
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800284 For creating the test-assistant object, there are three
285 possibilities: First, if the host is a lab system known to have
286 a test-assistant board, we connect to that board unconditionally.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700287 Second, if we're called from a control file that requires
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800288 test-assistant features for testing, it will pass settings from
289 the arguments, like `servo_host`, `servo_port`. If neither of
290 these cases apply, the test-assistant object will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700291
292 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700293 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700294 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700295 # self.env is a dictionary of environment variable settings
296 # to be exported for commands run on the host.
297 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
298 # errors that might happen.
299 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700300 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700301 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700302 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700303 # TODO(fdeng): We need to simplify the
304 # process of servo and servo_host initialization.
305 # crbug.com/298432
Dan Shi4d478522014-02-14 13:46:32 -0800306 self._servo_host = servo_host.create_servo_host(dut=self.hostname,
307 servo_args=servo_args)
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800308 # TODO(waihong): Do the simplication on Chameleon too.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800309 self._chameleon_host = chameleon_host.create_chameleon_host(
310 dut=self.hostname, chameleon_args=chameleon_args)
311
Dan Shi4d478522014-02-14 13:46:32 -0800312 if self._servo_host is not None:
313 self.servo = self._servo_host.get_servo()
314 else:
315 self.servo = None
316
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800317 if self._chameleon_host:
Tom Wai-Hong Tameaee3402014-01-22 08:52:10 +0800318 self.chameleon = self._chameleon_host.create_chameleon_board()
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800319 else:
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800320 self.chameleon = None
Fang Deng5d518f42013-08-02 14:04:32 -0700321
322
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500323 def get_repair_image_name(self):
324 """Generate a image_name from variables in the global config.
325
326 @returns a str of $board-version/$BUILD.
327
328 """
329 stable_version = global_config.global_config.get_config_value(
330 'CROS', 'stable_cros_version')
331 build_pattern = global_config.global_config.get_config_value(
332 'CROS', 'stable_build_pattern')
333 board = self._get_board_from_afe()
334 if board is None:
335 raise error.AutoservError('DUT has no board attribute, '
336 'cannot be repaired.')
337 return build_pattern % (board, stable_version)
338
339
Scott Zawalski62bacae2013-03-05 10:40:32 -0500340 def _host_in_AFE(self):
341 """Check if the host is an object the AFE knows.
342
343 @returns the host object.
344 """
345 return self._AFE.get_hosts(hostname=self.hostname)
346
347
Chris Sosab76e0ee2013-05-22 16:55:41 -0700348 def lookup_job_repo_url(self):
349 """Looks up the job_repo_url for the host.
350
351 @returns job_repo_url from AFE or None if not found.
352
353 @raises KeyError if the host does not have a job_repo_url
354 """
355 if not self._host_in_AFE():
356 return None
357
358 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700359 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
360 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700361
362
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500363 def clear_cros_version_labels_and_job_repo_url(self):
364 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500365 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400366 return
367
Scott Zawalski62bacae2013-03-05 10:40:32 -0500368 host_list = [self.hostname]
369 labels = self._AFE.get_labels(
370 name__startswith=ds_constants.VERSION_PREFIX,
371 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800372
Scott Zawalski62bacae2013-03-05 10:40:32 -0500373 for label in labels:
374 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500375
beepscb6f1e22013-06-28 19:14:10 -0700376 self.update_job_repo_url(None, None)
377
378
379 def update_job_repo_url(self, devserver_url, image_name):
380 """
381 Updates the job_repo_url host attribute and asserts it's value.
382
383 @param devserver_url: The devserver to use in the job_repo_url.
384 @param image_name: The name of the image to use in the job_repo_url.
385
386 @raises AutoservError: If we failed to update the job_repo_url.
387 """
388 repo_url = None
389 if devserver_url and image_name:
390 repo_url = tools.get_package_url(devserver_url, image_name)
391 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500392 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700393 if self.lookup_job_repo_url() != repo_url:
394 raise error.AutoservError('Failed to update job_repo_url with %s, '
395 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500396
397
Dan Shie9309262013-06-19 22:50:21 -0700398 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400399 """Add cros_version labels and host attribute job_repo_url.
400
401 @param image_name: The name of the image e.g.
402 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700403
Scott Zawalskieadbf702013-03-14 09:23:06 -0400404 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500405 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400406 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500407
Scott Zawalskieadbf702013-03-14 09:23:06 -0400408 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700409 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500410
411 labels = self._AFE.get_labels(name=cros_label)
412 if labels:
413 label = labels[0]
414 else:
415 label = self._AFE.create_label(name=cros_label)
416
417 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700418 self.update_job_repo_url(devserver_url, image_name)
419
420
beepsdae65fd2013-07-26 16:24:41 -0700421 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700422 """
423 Make sure job_repo_url of this host is valid.
424
joychen03eaad92013-06-26 09:55:21 -0700425 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700426 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
427 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
428 download and extract it. If the devserver embedded in the url is
429 unresponsive, update the job_repo_url of the host after staging it on
430 another devserver.
431
432 @param job_repo_url: A url pointing to the devserver where the autotest
433 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700434 @param tag: The tag from the server job, in the format
435 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700436
437 @raises DevServerException: If we could not resolve a devserver.
438 @raises AutoservError: If we're unable to save the new job_repo_url as
439 a result of choosing a new devserver because the old one failed to
440 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700441 @raises urllib2.URLError: If the devserver embedded in job_repo_url
442 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700443 """
444 job_repo_url = self.lookup_job_repo_url()
445 if not job_repo_url:
446 logging.warning('No job repo url set on host %s', self.hostname)
447 return
448
449 logging.info('Verifying job repo url %s', job_repo_url)
450 devserver_url, image_name = tools.get_devserver_build_from_package_url(
451 job_repo_url)
452
beeps0c865032013-07-30 11:37:06 -0700453 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700454
455 logging.info('Staging autotest artifacts for %s on devserver %s',
456 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700457
458 start_time = time.time()
Simran Basi25e7a922014-10-31 11:56:10 -0700459 ds.stage_artifacts(image_name, ['autotest_packages'])
beeps687243d2013-07-18 15:29:27 -0700460 stage_time = time.time() - start_time
461
462 # Record how much of the verification time comes from a devserver
463 # restage. If we're doing things right we should not see multiple
464 # devservers for a given board/build/branch path.
465 try:
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800466 board, build_type, branch = server_utils.ParseBuildName(
beeps687243d2013-07-18 15:29:27 -0700467 image_name)[:3]
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800468 except server_utils.ParseBuildNameException:
beeps687243d2013-07-18 15:29:27 -0700469 pass
470 else:
beeps0c865032013-07-30 11:37:06 -0700471 devserver = devserver_url[
Chris Sosa65425082013-10-16 13:26:22 -0700472 devserver_url.find('/') + 2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700473 stats_key = {
474 'board': board,
475 'build_type': build_type,
476 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700477 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700478 }
479 stats.Gauge('verify_job_repo_url').send(
480 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
481 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700482
Scott Zawalskieadbf702013-03-14 09:23:06 -0400483
Dan Shi0f466e82013-02-22 15:44:58 -0800484 def _try_stateful_update(self, update_url, force_update, updater):
485 """Try to use stateful update to initialize DUT.
486
487 When DUT is already running the same version that machine_install
488 tries to install, stateful update is a much faster way to clean up
489 the DUT for testing, compared to a full reimage. It is implemeted
490 by calling autoupdater.run_update, but skipping updating root, as
491 updating the kernel is time consuming and not necessary.
492
493 @param update_url: url of the image.
494 @param force_update: Set to True to update the image even if the DUT
495 is running the same version.
496 @param updater: ChromiumOSUpdater instance used to update the DUT.
497 @returns: True if the DUT was updated with stateful update.
498
499 """
J. Richard Barnette3f731032014-04-07 17:42:59 -0700500 # TODO(jrbarnette): Yes, I hate this re.match() test case.
501 # It's better than the alternative: see crbug.com/360944.
502 image_name = autoupdater.url_to_image_name(update_url)
503 release_pattern = r'^.*-release/R[0-9]+-[0-9]+\.[0-9]+\.0$'
504 if not re.match(release_pattern, image_name):
505 return False
Dan Shi0f466e82013-02-22 15:44:58 -0800506 if not updater.check_version():
507 return False
508 if not force_update:
509 logging.info('Canceling stateful update because the new and '
510 'old versions are the same.')
511 return False
512 # Following folders should be rebuilt after stateful update.
513 # A test file is used to confirm each folder gets rebuilt after
514 # the stateful update.
515 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
516 test_file = '.test_file_to_be_deleted'
517 for folder in folders_to_check:
518 touch_path = os.path.join(folder, test_file)
519 self.run('touch %s' % touch_path)
520
521 if not updater.run_update(force_update=True, update_root=False):
522 return False
523
524 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700525 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800526 check_file_cmd = 'test -f %s; echo $?'
527 for folder in folders_to_check:
528 test_file_path = os.path.join(folder, test_file)
529 result = self.run(check_file_cmd % test_file_path,
530 ignore_status=True)
531 if result.exit_status == 1:
532 return False
533 return True
534
535
J. Richard Barnette7275b612013-06-04 18:13:11 -0700536 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800537 """After the DUT is updated, confirm machine_install succeeded.
538
539 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700540 @param expected_kernel: kernel expected to be active after reboot,
541 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800542
543 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700544 # Touch the lab machine file to leave a marker that
545 # distinguishes this image from other test images.
546 # Afterwards, we must re-run the autoreboot script because
547 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800548 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800549 self.run('start autoreboot')
Chris Sosa65425082013-10-16 13:26:22 -0700550 updater.verify_boot_expectations(
551 expected_kernel, rollback_message=
552 'Build %s failed to boot on %s; system rolled back to previous'
553 'build' % (updater.update_version, self.hostname))
J. Richard Barnette7275b612013-06-04 18:13:11 -0700554 # Check that we've got the build we meant to install.
555 if not updater.check_version_to_confirm_install():
556 raise autoupdater.ChromiumOSError(
557 'Failed to update %s to build %s; found build '
558 '%s instead' % (self.hostname,
Chris Sosa65425082013-10-16 13:26:22 -0700559 updater.update_version,
560 updater.get_build_id()))
Dan Shi0f466e82013-02-22 15:44:58 -0800561
562
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700563 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400564 """Stage a build on a devserver and return the update_url.
565
566 @param image_name: a name like lumpy-release/R27-3837.0.0
567 @returns an update URL like:
568 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
569 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700570 if not image_name:
571 image_name = self.get_repair_image_name()
572 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400573 devserver = dev_server.ImageServer.resolve(image_name)
574 devserver.trigger_download(image_name, synchronous=False)
575 return tools.image_url_pattern() % (devserver.url(), image_name)
576
577
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700578 def stage_image_for_servo(self, image_name=None):
579 """Stage a build on a devserver and return the update_url.
580
581 @param image_name: a name like lumpy-release/R27-3837.0.0
582 @returns an update URL like:
583 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
584 """
585 if not image_name:
586 image_name = self.get_repair_image_name()
587 logging.info('Staging build for servo install: %s', image_name)
588 devserver = dev_server.ImageServer.resolve(image_name)
589 devserver.stage_artifacts(image_name, ['test_image'])
590 return devserver.get_test_image_url(image_name)
591
592
beepse539be02013-07-31 21:57:39 -0700593 def stage_factory_image_for_servo(self, image_name):
594 """Stage a build on a devserver and return the update_url.
595
596 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700597
beepse539be02013-07-31 21:57:39 -0700598 @return: An update URL, eg:
599 http://<devserver>/static/canary-channel/\
600 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700601
602 @raises: ValueError if the factory artifact name is missing from
603 the config.
604
beepse539be02013-07-31 21:57:39 -0700605 """
606 if not image_name:
607 logging.error('Need an image_name to stage a factory image.')
608 return
609
beeps12c0a3c2013-09-03 11:58:27 -0700610 factory_artifact = global_config.global_config.get_config_value(
611 'CROS', 'factory_artifact', type=str, default='')
612 if not factory_artifact:
613 raise ValueError('Cannot retrieve the factory artifact name from '
614 'autotest config, and hence cannot stage factory '
615 'artifacts.')
616
beepse539be02013-07-31 21:57:39 -0700617 logging.info('Staging build for servo install: %s', image_name)
618 devserver = dev_server.ImageServer.resolve(image_name)
619 devserver.stage_artifacts(
620 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700621 [factory_artifact],
622 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700623
624 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
625
626
Chris Sosaa3ac2152012-05-23 22:23:13 -0700627 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500628 local_devserver=False, repair=False):
629 """Install the DUT.
630
Dan Shi0f466e82013-02-22 15:44:58 -0800631 Use stateful update if the DUT is already running the same build.
632 Stateful update does not update kernel and tends to run much faster
633 than a full reimage. If the DUT is running a different build, or it
634 failed to do a stateful update, full update, including kernel update,
635 will be applied to the DUT.
636
Scott Zawalskieadbf702013-03-14 09:23:06 -0400637 Once a host enters machine_install its cros_version label will be
638 removed as well as its host attribute job_repo_url (used for
639 package install).
640
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500641 @param update_url: The url to use for the update
642 pattern: http://$devserver:###/update/$build
643 If update_url is None and repair is True we will install the
644 stable image listed in global_config under
645 CROS.stable_cros_version.
646 @param force_update: Force an update even if the version installed
647 is the same. Default:False
648 @param local_devserver: Used by run_remote_test to allow people to
649 use their local devserver. Default: False
650 @param repair: Whether or not we are in repair mode. This adds special
651 cases for repairing a machine like starting update_engine.
652 Setting repair to True sets force_update to True as well.
653 default: False
654 @raises autoupdater.ChromiumOSError
655
656 """
Dan Shi7458bf62013-06-10 12:50:16 -0700657 if update_url:
658 logging.debug('update url is set to %s', update_url)
659 else:
660 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700661 if self._parser.options.image:
662 requested_build = self._parser.options.image
663 if requested_build.startswith('http://'):
664 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700665 logging.debug('update url is retrieved from requested_build'
666 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700667 else:
668 # Try to stage any build that does not start with
669 # http:// on the devservers defined in
670 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700671 update_url = self._stage_image_for_update(requested_build)
672 logging.debug('Build staged, and update_url is set to: %s',
673 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700674 elif repair:
675 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700676 logging.debug('Build staged, and update_url is set to: %s',
677 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400678 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700679 raise autoupdater.ChromiumOSError(
680 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500681
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500682 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800683 # In case the system is in a bad state, we always reboot the machine
684 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700685 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500686 self.run('stop update-engine; start update-engine')
687 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800688
Chris Sosaa3ac2152012-05-23 22:23:13 -0700689 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700690 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800691 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400692 # Remove cros-version and job_repo_url host attribute from host.
693 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800694 # If the DUT is already running the same build, try stateful update
695 # first. Stateful update does not update kernel and tends to run much
696 # faster than a full reimage.
697 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700698 updated = self._try_stateful_update(
699 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800700 if updated:
701 logging.info('DUT is updated with stateful update.')
702 except Exception as e:
703 logging.exception(e)
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700704 logging.warning('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700705
Dan Shi0f466e82013-02-22 15:44:58 -0800706 inactive_kernel = None
707 # Do a full update if stateful update is not applicable or failed.
708 if not updated:
709 # In case the system is in a bad state, we always reboot the
710 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700711 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700712
713 # TODO(sosa): Remove temporary hack to get rid of bricked machines
714 # that can't update due to a corrupted policy.
715 self.run('rm -rf /var/lib/whitelist')
Danny Chanad490bd2014-11-04 14:15:57 -0800716 self.run('mkdir /var/lib/whitelist')
Chris Sosab7612bc2013-03-21 10:32:37 -0700717 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400718 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700719
Dan Shi0f466e82013-02-22 15:44:58 -0800720 if updater.run_update(force_update):
721 updated = True
722 # Figure out active and inactive kernel.
723 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700724
Dan Shi0f466e82013-02-22 15:44:58 -0800725 # Ensure inactive kernel has higher priority than active.
726 if (updater.get_kernel_priority(inactive_kernel)
727 < updater.get_kernel_priority(active_kernel)):
728 raise autoupdater.ChromiumOSError(
729 'Update failed. The priority of the inactive kernel'
730 ' partition is less than that of the active kernel'
731 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700732
Dan Shi0f466e82013-02-22 15:44:58 -0800733 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700734 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700735
Dan Shi0f466e82013-02-22 15:44:58 -0800736 if updated:
737 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400738 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700739 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800740
Simran Basiae08c8c2014-09-02 11:17:26 -0700741 logging.debug('Cleaning up old autotest directories.')
742 try:
743 installed_autodir = autotest.Autotest.get_installed_autodir(self)
744 self.run('rm -rf ' + installed_autodir)
745 except autotest.AutodirNotFoundError:
746 logging.debug('No autotest installed directory found.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700747
748
Dan Shi9cb0eec2014-06-03 09:04:50 -0700749 def _clear_fw_version_labels(self):
750 """Clear firmware version labels from the machine."""
751 labels = self._AFE.get_labels(
752 name__startswith=provision.FW_VERSION_PREFIX,
753 host__hostname=self.hostname)
754 for label in labels:
755 label.remove_hosts(hosts=[self.hostname])
756
757
758 def _add_fw_version_label(self, build):
759 """Add firmware version label to the machine.
760
761 @param build: Build of firmware.
762
763 """
764 fw_label = provision.fw_version_to_label(build)
765 provision.ensure_label_exists(fw_label)
766 label = self._AFE.get_labels(name__startswith=fw_label)[0]
767 label.add_hosts([self.hostname])
768
769
770 def firmware_install(self, build=None):
771 """Install firmware to the DUT.
772
773 Use stateful update if the DUT is already running the same build.
774 Stateful update does not update kernel and tends to run much faster
775 than a full reimage. If the DUT is running a different build, or it
776 failed to do a stateful update, full update, including kernel update,
777 will be applied to the DUT.
778
779 Once a host enters firmware_install its fw_version label will be
780 removed. After the firmware is updated successfully, a new fw_version
781 label will be added to the host.
782
783 @param build: The build version to which we want to provision the
784 firmware of the machine,
785 e.g. 'link-firmware/R22-2695.1.144'.
786
787 TODO(dshi): After bug 381718 is fixed, update here with corresponding
788 exceptions that could be raised.
789
790 """
791 if not self.servo:
792 raise error.TestError('Host %s does not have servo.' %
793 self.hostname)
794
795 # TODO(fdeng): use host.get_board() after
796 # crbug.com/271834 is fixed.
797 board = self._get_board_from_afe()
798
799 # If build is not set, assume it's repair mode and try to install
800 # firmware from stable CrOS.
801 if not build:
802 build = self.get_repair_image_name()
803
804 config = FAFTConfig(board)
805 if config.use_u_boot:
806 ap_image = 'image-%s.bin' % board
807 else: # Depthcharge platform
808 ap_image = 'image.bin'
809 ec_image = 'ec.bin'
810 ds = dev_server.ImageServer.resolve(build)
811 ds.stage_artifacts(build, ['firmware'])
812
813 tmpd = autotemp.tempdir(unique_id='fwimage')
814 try:
815 fwurl = self._FW_IMAGE_URL_PATTERN % (ds.url(), build)
816 local_tarball = os.path.join(tmpd.name, os.path.basename(fwurl))
817 server_utils.system('wget -O %s %s' % (local_tarball, fwurl),
818 timeout=60)
819 server_utils.system('tar xf %s -C %s %s %s' %
820 (local_tarball, tmpd.name, ap_image, ec_image),
821 timeout=60)
822 server_utils.system('tar xf %s --wildcards -C %s "dts/*"' %
823 (local_tarball, tmpd.name),
824 timeout=60, ignore_status=True)
825
826 self._clear_fw_version_labels()
827 logging.info('Will re-program EC now')
828 self.servo.program_ec(os.path.join(tmpd.name, ec_image))
829 logging.info('Will re-program BIOS now')
830 self.servo.program_bios(os.path.join(tmpd.name, ap_image))
831 self.servo.get_power_state_controller().reset()
832 time.sleep(self.servo.BOOT_DELAY)
833 self._add_fw_version_label()
834 finally:
835 tmpd.clean()
836
837
Dan Shi10e992b2013-08-30 11:02:59 -0700838 def show_update_engine_log(self):
839 """Output update engine log."""
MK Ryu35d661e2014-09-25 17:44:10 -0700840 logging.debug('Dumping %s', client_constants.UPDATE_ENGINE_LOG)
841 self.run('cat %s' % client_constants.UPDATE_ENGINE_LOG)
Dan Shi10e992b2013-08-30 11:02:59 -0700842
843
Richard Barnette82c35912012-11-20 10:09:10 -0800844 def _get_board_from_afe(self):
845 """Retrieve this host's board from its labels in the AFE.
846
847 Looks for a host label of the form "board:<board>", and
848 returns the "<board>" part of the label. `None` is returned
849 if there is not a single, unique label matching the pattern.
850
851 @returns board from label, or `None`.
852 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700853 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800854
855
856 def get_build(self):
857 """Retrieve the current build for this Host from the AFE.
858
859 Looks through this host's labels in the AFE to determine its build.
860
861 @returns The current build or None if it could not find it or if there
862 were multiple build labels assigned to this host.
863 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700864 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800865
866
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500867 def _install_repair(self):
868 """Attempt to repair this host using upate-engine.
869
870 If the host is up, try installing the DUT with a stable
871 "repair" version of Chrome OS as defined in the global_config
872 under CROS.stable_cros_version.
873
Scott Zawalski62bacae2013-03-05 10:40:32 -0500874 @raises AutoservRepairMethodNA if the DUT is not reachable.
875 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500876
877 """
878 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500879 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500880 logging.info('Attempting to reimage machine to repair image.')
881 try:
882 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700883 except autoupdater.ChromiumOSError as e:
884 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500885 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500886 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500887
888
Dan Shi2c88eed2013-11-12 10:18:38 -0800889 def _install_repair_with_powerwash(self):
Dan Shi9cc48452013-11-12 12:39:26 -0800890 """Attempt to powerwash first then repair this host using update-engine.
Dan Shi2c88eed2013-11-12 10:18:38 -0800891
Dan Shi9cc48452013-11-12 12:39:26 -0800892 update-engine may fail due to a bad image. In such case, powerwash
893 may help to cleanup the DUT for update-engine to work again.
Dan Shi2c88eed2013-11-12 10:18:38 -0800894
895 @raises AutoservRepairMethodNA if the DUT is not reachable.
896 @raises ChromiumOSError if the install failed for some reason.
897
898 """
899 if not self.is_up():
900 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
901
902 logging.info('Attempting to powerwash the DUT.')
903 self.run('echo "fast safe" > '
904 '/mnt/stateful_partition/factory_install_reset')
905 self.reboot(timeout=self.POWERWASH_BOOT_TIMEOUT, wait=True)
906 if not self.is_up():
Dan Shi9cc48452013-11-12 12:39:26 -0800907 logging.error('Powerwash failed. DUT did not come back after '
Dan Shi2c88eed2013-11-12 10:18:38 -0800908 'reboot.')
909 raise error.AutoservRepairFailure(
910 'DUT failed to boot from powerwash after %d seconds' %
911 self.POWERWASH_BOOT_TIMEOUT)
912
913 logging.info('Powerwash succeeded.')
914 self._install_repair()
915
916
beepsf079cfb2013-09-18 17:49:51 -0700917 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
918 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500919 """
920 Re-install the OS on the DUT by:
921 1) installing a test image on a USB storage device attached to the Servo
922 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800923 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700924 3) installing the image with chromeos-install.
925
Scott Zawalski62bacae2013-03-05 10:40:32 -0500926 @param image_url: If specified use as the url to install on the DUT.
927 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700928 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
929 Factory images need a longer usb_boot_timeout than regular
930 cros images.
931 @param install_timeout: The timeout to use when installing the chromeos
932 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800933
Scott Zawalski62bacae2013-03-05 10:40:32 -0500934 @raises AutoservError if the image fails to boot.
beepsf079cfb2013-09-18 17:49:51 -0700935
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800936 """
beepsf079cfb2013-09-18 17:49:51 -0700937 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
938 % usb_boot_timeout)
939 logging.info('Downloading image to USB, then booting from it. Usb boot '
940 'timeout = %s', usb_boot_timeout)
941 timer = stats.Timer(usb_boot_timer_key)
942 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700943 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700944 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500945 raise error.AutoservRepairFailure(
946 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700947 usb_boot_timeout)
948 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500949
beepsf079cfb2013-09-18 17:49:51 -0700950 install_timer_key = ('servo_install.install_timeout_%s'
951 % install_timeout)
952 timer = stats.Timer(install_timer_key)
953 timer.start()
954 logging.info('Installing image through chromeos-install.')
MK Ryu35d661e2014-09-25 17:44:10 -0700955 self.run('chromeos-install --yes --lab_preserve_logs=%s' %
956 self._LOGS_TO_COLLECT_FILE,
957 timeout=install_timeout)
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800958 self.run('halt')
beepsf079cfb2013-09-18 17:49:51 -0700959 timer.stop()
960
961 logging.info('Power cycling DUT through servo.')
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800962 self.servo.get_power_state_controller().power_off()
Fang Dengafb88142013-05-30 17:44:31 -0700963 self.servo.switch_usbkey('off')
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800964 # N.B. The Servo API requires that we use power_on() here
965 # for two reasons:
966 # 1) After turning on a DUT in recovery mode, you must turn
967 # it off and then on with power_on() once more to
968 # disable recovery mode (this is a Parrot specific
969 # requirement).
970 # 2) After power_off(), the only way to turn on is with
971 # power_on() (this is a Storm specific requirement).
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700972 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -0700973
974 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800975 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
976 raise error.AutoservError('DUT failed to reboot installed '
977 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500978 self.BOOT_TIMEOUT)
979
980
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700981 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500982 """Reinstall the DUT utilizing servo and a test image.
983
984 Re-install the OS on the DUT by:
985 1) installing a test image on a USB storage device attached to the Servo
986 board,
987 2) booting that image in recovery mode, and then
988 3) installing the image with chromeos-install.
989
Scott Zawalski62bacae2013-03-05 10:40:32 -0500990 @raises AutoservRepairMethodNA if the device does not have servo
991 support.
992
993 """
994 if not self.servo:
995 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
996 'DUT has no servo support.')
997
998 logging.info('Attempting to recovery servo enabled device with '
999 'servo_repair_reinstall')
1000
J. Richard Barnettee4af8b92013-05-01 13:16:12 -07001001 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001002 self.servo_install(image_url)
1003
1004
1005 def _servo_repair_power(self):
1006 """Attempt to repair DUT using an attached Servo.
1007
1008 Attempt to power on the DUT via power_long_press.
1009
1010 @raises AutoservRepairMethodNA if the device does not have servo
1011 support.
1012 @raises AutoservRepairFailure if the repair fails for any reason.
1013 """
1014 if not self.servo:
1015 raise error.AutoservRepairMethodNA('Repair Power NA: '
1016 'DUT has no servo support.')
1017
1018 logging.info('Attempting to recover servo enabled device by '
1019 'powering it off and on.')
1020 self.servo.get_power_state_controller().power_off()
1021 self.servo.get_power_state_controller().power_on()
1022 if self.wait_up(self.BOOT_TIMEOUT):
1023 return
1024
1025 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -08001026
1027
Richard Barnette82c35912012-11-20 10:09:10 -08001028 def _powercycle_to_repair(self):
1029 """Utilize the RPM Infrastructure to bring the host back up.
1030
1031 If the host is not up/repaired after the first powercycle we utilize
1032 auto fallback to the last good install by powercycling and rebooting the
1033 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001034
1035 @raises AutoservRepairMethodNA if the device does not support remote
1036 power.
1037 @raises AutoservRepairFailure if the repair fails for any reason.
1038
Richard Barnette82c35912012-11-20 10:09:10 -08001039 """
Scott Zawalski62bacae2013-03-05 10:40:32 -05001040 if not self.has_power():
1041 raise error.AutoservRepairMethodNA('Device does not support power.')
1042
Richard Barnette82c35912012-11-20 10:09:10 -08001043 logging.info('Attempting repair via RPM powercycle.')
1044 failed_cycles = 0
1045 self.power_cycle()
1046 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
1047 failed_cycles += 1
1048 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -05001049 raise error.AutoservRepairFailure(
1050 'Powercycled host %s %d times; device did not come back'
1051 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -08001052 self.power_cycle()
1053 if failed_cycles == 0:
1054 logging.info('Powercycling was successful first time.')
1055 else:
1056 logging.info('Powercycling was successful after %d failures.',
1057 failed_cycles)
1058
1059
MK Ryu35d661e2014-09-25 17:44:10 -07001060 def _reboot_repair(self):
1061 """SSH to this host and reboot."""
1062 if not self.is_up(self._CHECK_HOST_UP_TIMEOUT_SECS):
1063 raise error.AutoservRepairMethodNA('DUT unreachable for reboot.')
1064 logging.info('Attempting repair via SSH reboot.')
1065 self.reboot(timeout=self.BOOT_TIMEOUT, wait=True)
1066
1067
Prashanth B4d8184f2014-05-05 12:22:02 -07001068 def check_device(self):
1069 """Check if a device is ssh-able, and if so, clean and verify it.
1070
1071 @raise AutoservSSHTimeout: If the ssh ping times out.
1072 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
1073 permissions.
1074 @raise AutoservSshPingHostError: For other AutoservRunErrors during
1075 ssh_ping.
1076 @raises AutoservError: As appropriate, during cleanup and verify.
1077 """
1078 self.ssh_ping()
1079 self.cleanup()
1080 self.verify()
1081
1082
Richard Barnette82c35912012-11-20 10:09:10 -08001083 def repair_full(self):
1084 """Repair a host for repair level NO_PROTECTION.
1085
1086 This overrides the base class function for repair; it does
1087 not call back to the parent class, but instead offers a
1088 simplified implementation based on the capabilities in the
1089 Chrome OS test lab.
1090
Fang Deng5d518f42013-08-02 14:04:32 -07001091 It first verifies and repairs servo if it is a DUT in CrOS
Fang Deng03590af2013-10-07 17:34:20 -07001092 lab and a servo is attached.
Fang Deng5d518f42013-08-02 14:04:32 -07001093
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001094 This escalates in order through the following procedures and verifies
1095 the status using `self.check_device()` after each of them. This is done
1096 until both the repair and the veryfing step succeed.
1097
MK Ryu35d661e2014-09-25 17:44:10 -07001098 Escalation order of repair procedures from less intrusive to
1099 more intrusive repairs:
1100 1. SSH to the DUT and reboot.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001101 2. If there's a servo for the DUT, try to power the DUT off and
1102 on.
MK Ryu35d661e2014-09-25 17:44:10 -07001103 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -08001104 by power-cycling.
MK Ryu35d661e2014-09-25 17:44:10 -07001105 4. Try to re-install to a known stable image using
1106 auto-update.
1107 5. If there's a servo for the DUT, try to re-install via
1108 the servo.
Richard Barnette82c35912012-11-20 10:09:10 -08001109
1110 As with the parent method, the last operation performed on
Prashanth B4d8184f2014-05-05 12:22:02 -07001111 the DUT must be to call `self.check_device()`; If that call fails the
1112 exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -07001113
Scott Zawalski62bacae2013-03-05 10:40:32 -05001114 @raises AutoservRepairTotalFailure if the repair process fails to
1115 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -07001116 @raises ServoHostRepairTotalFailure if the repair process fails to
1117 fix the servo host if one is attached to the DUT.
1118 @raises AutoservSshPermissionDeniedError if it is unable
1119 to ssh to the servo host due to permission error.
1120
Richard Barnette82c35912012-11-20 10:09:10 -08001121 """
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001122 # Caution: Deleting shards relies on repair to always reboot the DUT.
1123
Dan Shi4d478522014-02-14 13:46:32 -08001124 if self._servo_host and not self.servo:
Fang Deng03590af2013-10-07 17:34:20 -07001125 try:
Dan Shi4d478522014-02-14 13:46:32 -08001126 self._servo_host.repair_full()
Fang Deng03590af2013-10-07 17:34:20 -07001127 except Exception as e:
Fang Deng03590af2013-10-07 17:34:20 -07001128 logging.error('Could not create a healthy servo: %s', e)
Dan Shi4d478522014-02-14 13:46:32 -08001129 self.servo = self._servo_host.get_servo()
Fang Deng5d518f42013-08-02 14:04:32 -07001130
MK Ryu35d661e2014-09-25 17:44:10 -07001131 self.try_collect_crashlogs()
1132
Scott Zawalski62bacae2013-03-05 10:40:32 -05001133 # TODO(scottz): This should use something similar to label_decorator,
1134 # but needs to be populated in order so DUTs are repaired with the
1135 # least amount of effort.
MK Ryu35d661e2014-09-25 17:44:10 -07001136 repair_funcs = [self._reboot_repair,
1137 self._servo_repair_power,
1138 self._powercycle_to_repair,
Dan Shi849a1c42014-03-05 11:10:43 -08001139 self._install_repair,
Dan Shi2c88eed2013-11-12 10:18:38 -08001140 self._install_repair_with_powerwash,
MK Ryu35d661e2014-09-25 17:44:10 -07001141 self._servo_repair_reinstall]
Scott Zawalski62bacae2013-03-05 10:40:32 -05001142 errors = []
Simran Basie6130932013-10-01 14:07:52 -07001143 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001144 for repair_func in repair_funcs:
1145 try:
1146 repair_func()
MK Ryu35d661e2014-09-25 17:44:10 -07001147 self.try_collect_crashlogs()
Prashanth B4d8184f2014-05-05 12:22:02 -07001148 self.check_device()
Simran Basie6130932013-10-01 14:07:52 -07001149 stats.Counter(
1150 '%s.SUCCEEDED' % repair_func.__name__).increment()
1151 if board:
1152 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001153 '%s.%s.SUCCEEDED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001154 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001155 return
Simran Basie6130932013-10-01 14:07:52 -07001156 except error.AutoservRepairMethodNA as e:
1157 stats.Counter(
1158 '%s.RepairNA' % repair_func.__name__).increment()
1159 if board:
1160 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001161 '%s.%s.RepairNA' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001162 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001163 logging.warning('Repair function NA: %s', e)
Simran Basie6130932013-10-01 14:07:52 -07001164 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -05001165 except Exception as e:
Simran Basie6130932013-10-01 14:07:52 -07001166 stats.Counter(
1167 '%s.FAILED' % repair_func.__name__).increment()
1168 if board:
1169 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001170 '%s.%s.FAILED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001171 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001172 logging.warning('Failed to repair device: %s', e)
Scott Zawalski62bacae2013-03-05 10:40:32 -05001173 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -05001174
Simran Basie6130932013-10-01 14:07:52 -07001175 stats.Counter('Full_Repair_Failed').increment()
1176 if board:
1177 stats.Counter(
1178 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001179 raise error.AutoservRepairTotalFailure(
1180 'All attempts at repairing the device failed:\n%s' %
1181 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -08001182
1183
MK Ryu35d661e2014-09-25 17:44:10 -07001184 def try_collect_crashlogs(self, check_host_up=True):
1185 """
1186 Check if a host is up and logs need to be collected from the host,
1187 if yes, collect them.
1188
1189 @param check_host_up: Flag for checking host is up. Default is True.
1190 """
1191 try:
1192 crash_job = self._need_crash_logs()
1193 if crash_job:
1194 logging.debug('%s: Job %s was crashed', self._CRASHLOGS_PREFIX,
1195 crash_job)
1196 if not check_host_up or self.is_up(
1197 self._CHECK_HOST_UP_TIMEOUT_SECS):
1198 self._collect_crashlogs(crash_job)
1199 logging.debug('%s: Completed collecting logs for the '
1200 'crashed job %s', self._CRASHLOGS_PREFIX,
1201 crash_job)
1202 except Exception as e:
1203 # Exception should not result in repair failure.
1204 # Therefore, suppress all exceptions here.
1205 logging.error('%s: Failed while trying to collect crash-logs: %s',
1206 self._CRASHLOGS_PREFIX, e)
1207
1208
1209 def _need_crash_logs(self):
1210 """Get the value of need_crash_logs attribute of this host.
1211
1212 @return: Value string of need_crash_logs attribute
1213 None if there is no need_crash_logs attribute
1214 """
1215 attrs = self._AFE.get_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1216 hostname=self.hostname)
1217 assert len(attrs) < 2
1218 return attrs[0].value if attrs else None
1219
1220
1221 def _collect_crashlogs(self, job_id):
1222 """Grab logs from the host where a job was crashed.
1223
1224 First, check if PRIOR_LOGS_DIR exists in the host.
1225 If yes, collect them.
1226 Otherwise, check if a lab-machine marker (_LAB_MACHINE_FILE) exists
1227 in the host.
1228 If yes, the host was repaired automatically, and we collect normal
1229 system logs.
1230
1231 @param job_id: Id of the job that was crashed.
1232 """
1233 crashlogs_dir = crashcollect.get_crashinfo_dir(self,
1234 constants.CRASHLOGS_DEST_DIR_PREFIX)
1235 flag_prior_logs = False
1236
1237 if self.path_exists(client_constants.PRIOR_LOGS_DIR):
1238 flag_prior_logs = True
1239 self._collect_prior_logs(crashlogs_dir)
1240 elif self.path_exists(self._LAB_MACHINE_FILE):
1241 self._collect_system_logs(crashlogs_dir)
1242 else:
1243 logging.warning('%s: Host was manually re-installed without '
1244 '--lab_preserve_log option. Skip collecting '
1245 'crash-logs.', self._CRASHLOGS_PREFIX)
1246
1247 # We make crash collection be one-time effort.
1248 # _collect_prior_logs() and _collect_system_logs() will not throw
1249 # any exception, and following codes will be executed even when
1250 # those methods fail.
1251 # _collect_crashlogs() is called only when the host is up (refer
1252 # to try_collect_crashlogs()). We assume _collect_prior_logs() and
1253 # _collect_system_logs() fail rarely when the host is up.
1254 # In addition, it is not clear how many times we should try crash
1255 # collection again while not triggering next repair unnecessarily.
1256 # Threfore, we try crash collection one time.
1257
1258 # Create a marker file as soon as log collection is done.
1259 # Leave the job id to this marker for gs_offloader to consume.
1260 marker_file = os.path.join(crashlogs_dir, constants.CRASHLOGS_MARKER)
1261 with open(marker_file, 'a') as f:
1262 f.write('%s\n' % job_id)
1263
1264 # Remove need_crash_logs attribute
1265 logging.debug('%s: Remove attribute need_crash_logs from host %s',
1266 self._CRASHLOGS_PREFIX, self.hostname)
1267 self._AFE.set_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1268 None, hostname=self.hostname)
1269
1270 if flag_prior_logs:
1271 logging.debug('%s: Remove %s from host %s', self._CRASHLOGS_PREFIX,
1272 client_constants.PRIOR_LOGS_DIR, self.hostname)
1273 self.run('rm -rf %s; sync' % client_constants.PRIOR_LOGS_DIR)
1274 # Wait for a few seconds to make sure the prior command is
1275 # done deep through storage.
1276 time.sleep(self._SAFE_WAIT_SECS)
1277
1278
1279 def _collect_prior_logs(self, crashlogs_dir):
1280 """Grab prior logs that were stashed before re-installing a host.
1281
1282 @param crashlogs_dir: Directory path where crash-logs are stored.
1283 """
1284 logging.debug('%s: Found %s, collecting them...',
1285 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1286 try:
1287 self.collect_logs(client_constants.PRIOR_LOGS_DIR,
1288 crashlogs_dir, False)
1289 logging.debug('%s: %s is collected',
1290 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1291 except Exception as e:
1292 logging.error('%s: Failed to collect %s: %s',
1293 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR,
1294 e)
1295
1296
1297 def _collect_system_logs(self, crashlogs_dir):
1298 """Grab normal system logs from a host.
1299
1300 @param crashlogs_dir: Directory path where crash-logs are stored.
1301 """
1302 logging.debug('%s: Found %s, collecting system logs...',
1303 self._CRASHLOGS_PREFIX, self._LAB_MACHINE_FILE)
1304 sources = server_utils.parse_simple_config(self._LOGS_TO_COLLECT_FILE)
1305 for src in sources:
1306 try:
1307 if self.path_exists(src):
1308 logging.debug('%s: Collecting %s...',
1309 self._CRASHLOGS_PREFIX, src)
1310 dest = server_utils.concat_path_except_last(
1311 crashlogs_dir, src)
1312 self.collect_logs(src, dest, False)
1313 logging.debug('%s: %s is collected',
1314 self._CRASHLOGS_PREFIX, src)
1315 except Exception as e:
1316 logging.error('%s: Failed to collect %s: %s',
1317 self._CRASHLOGS_PREFIX, src, e)
1318
1319
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001320 def close(self):
beeps32a63082013-08-22 14:02:29 -07001321 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -07001322 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001323
1324
Dan Shi49ca0932014-11-14 11:22:27 -08001325 def get_power_supply_info(self):
1326 """Get the output of power_supply_info.
1327
1328 power_supply_info outputs the info of each power supply, e.g.,
1329 Device: Line Power
1330 online: no
1331 type: Mains
1332 voltage (V): 0
1333 current (A): 0
1334 Device: Battery
1335 state: Discharging
1336 percentage: 95.9276
1337 technology: Li-ion
1338
1339 Above output shows two devices, Line Power and Battery, with details of
1340 each device listed. This function parses the output into a dictionary,
1341 with key being the device name, and value being a dictionary of details
1342 of the device info.
1343
1344 @return: The dictionary of power_supply_info, e.g.,
1345 {'Line Power': {'online': 'yes', 'type': 'main'},
1346 'Battery': {'vendor': 'xyz', 'percentage': '100'}}
1347 """
1348 result = self.run('power_supply_info').stdout.strip()
1349 info = {}
1350 device_name = None
1351 device_info = {}
1352 for line in result.split('\n'):
1353 pair = [v.strip() for v in line.split(':')]
1354 if len(pair) != 2:
1355 continue
1356 if pair[0] == 'Device':
1357 if device_name:
1358 info[device_name] = device_info
1359 device_name = pair[1]
1360 device_info = {}
1361 else:
1362 device_info[pair[0]] = pair[1]
1363 if device_name and not device_name in info:
1364 info[device_name] = device_info
1365 return info
1366
1367
1368 def get_battery_percentage(self):
1369 """Get the battery percentage.
1370
1371 @return: The percentage of battery level, value range from 0-100. Return
1372 None if the battery info cannot be retrieved.
1373 """
1374 try:
1375 info = self.get_power_supply_info()
1376 logging.info(info)
1377 return float(info['Battery']['percentage'])
1378 except KeyError, ValueError:
1379 return None
1380
1381
1382 def is_ac_connected(self):
1383 """Check if the dut has power adapter connected and charging.
1384
1385 @return: True if power adapter is connected and charging.
1386 """
1387 try:
1388 info = self.get_power_supply_info()
1389 return info['Line Power']['online'] == 'yes'
1390 except KeyError:
1391 return False
1392
1393
Simran Basi5e6339a2013-03-21 11:34:32 -07001394 def _cleanup_poweron(self):
1395 """Special cleanup method to make sure hosts always get power back."""
1396 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1397 hosts = afe.get_hosts(hostname=self.hostname)
1398 if not hosts or not (self._RPM_OUTLET_CHANGED in
1399 hosts[0].attributes):
1400 return
1401 logging.debug('This host has recently interacted with the RPM'
1402 ' Infrastructure. Ensuring power is on.')
1403 try:
1404 self.power_on()
Dan Shi7dca56e2014-11-11 17:07:56 -08001405 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1406 hostname=self.hostname)
Simran Basi5e6339a2013-03-21 11:34:32 -07001407 except rpm_client.RemotePowerException:
Simran Basi5e6339a2013-03-21 11:34:32 -07001408 logging.error('Failed to turn Power On for this host after '
1409 'cleanup through the RPM Infrastructure.')
Dan Shi7dca56e2014-11-11 17:07:56 -08001410 es_utils.ESMetadata().post(
1411 type_str='RPM_poweron_failure',
1412 metadata={'hostname': self.hostname})
Dan Shi49ca0932014-11-14 11:22:27 -08001413
1414 battery_percentage = self.get_battery_percentage()
Dan Shif01ebe22014-12-05 13:10:57 -08001415 if battery_percentage and battery_percentage < 50:
Dan Shi49ca0932014-11-14 11:22:27 -08001416 raise
1417 elif self.is_ac_connected():
1418 logging.info('The device has power adapter connected and '
1419 'charging. No need to try to turn RPM on '
1420 'again.')
1421 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1422 hostname=self.hostname)
1423 logging.info('Battery level is now at %s%%. The device may '
1424 'still have enough power to run test, so no '
1425 'exception will be raised.', battery_percentage)
1426
Simran Basi5e6339a2013-03-21 11:34:32 -07001427
beepsc87ff602013-07-31 21:53:00 -07001428 def _is_factory_image(self):
1429 """Checks if the image on the DUT is a factory image.
1430
1431 @return: True if the image on the DUT is a factory image.
1432 False otherwise.
1433 """
1434 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1435 return result.exit_status == 0
1436
1437
1438 def _restart_ui(self):
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001439 """Restart the Chrome UI.
beepsc87ff602013-07-31 21:53:00 -07001440
1441 @raises: FactoryImageCheckerException for factory images, since
1442 we cannot attempt to restart ui on them.
1443 error.AutoservRunError for any other type of error that
1444 occurs while restarting ui.
1445 """
1446 if self._is_factory_image():
1447 raise FactoryImageCheckerException('Cannot restart ui on factory '
1448 'images')
1449
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001450 # TODO(jrbarnette): The command to stop/start the ui job
1451 # should live inside cros_ui, too. However that would seem
1452 # to imply interface changes to the existing start()/restart()
1453 # functions, which is a bridge too far (for now).
1454 prompt = cros_ui.get_login_prompt_state(self)
1455 self.run('stop ui; start ui')
1456 cros_ui.wait_for_chrome_ready(prompt, self)
beepsc87ff602013-07-31 21:53:00 -07001457
1458
1459 def cleanup(self):
MK Ryu35d661e2014-09-25 17:44:10 -07001460 self.run('rm -f %s' % client_constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001461 try:
beepsc87ff602013-07-31 21:53:00 -07001462 self._restart_ui()
1463 except (error.AutotestRunError, error.AutoservRunError,
1464 FactoryImageCheckerException):
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001465 logging.warning('Unable to restart ui, rebooting device.')
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001466 # Since restarting the UI fails fall back to normal Autotest
1467 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001468 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001469 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001470 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001471 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001472
1473
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001474 def reboot(self, **dargs):
1475 """
1476 This function reboots the site host. The more generic
1477 RemoteHost.reboot() performs sync and sleeps for 5
1478 seconds. This is not necessary for Chrome OS devices as the
1479 sync should be finished in a short time during the reboot
1480 command.
1481 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001482 if 'reboot_cmd' not in dargs:
Doug Anderson7d5aeb22014-02-27 15:12:17 -08001483 reboot_timeout = dargs.get('reboot_timeout', 10)
1484 dargs['reboot_cmd'] = ('((reboot & sleep %d; reboot -f &)'
1485 ' </dev/null >/dev/null 2>&1 &)' %
1486 reboot_timeout)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001487 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001488 if 'fastsync' not in dargs:
1489 dargs['fastsync'] = True
Michael Liangda8c60a2014-06-03 13:24:51 -07001490
Charlie Mooneya8e6dab2014-05-29 14:37:55 -07001491 # For purposes of logging reboot times:
1492 # Get the board name i.e. 'daisy_spring'
Michael Liangca4f5a62014-07-10 15:45:13 -07001493 board_fullname = self.get_board()
1494
1495 # Strip the prefix and add it to dargs.
1496 dargs['board'] = board_fullname[board_fullname.find(':')+1:]
Fang Deng0ca40e22013-08-27 17:47:44 -07001497 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001498
1499
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07001500 def suspend(self, **dargs):
1501 """
1502 This function suspends the site host.
1503 """
1504 suspend_time = dargs.get('suspend_time', 60)
1505 dargs['timeout'] = suspend_time
1506 if 'suspend_cmd' not in dargs:
1507 cmd = ' && '.join(['echo 0 > /sys/class/rtc/rtc0/wakealarm',
1508 'echo +%d > /sys/class/rtc/rtc0/wakealarm' % suspend_time,
1509 'powerd_dbus_suspend --delay=0 &'])
1510 dargs['suspend_cmd'] = ('(( %s )'
1511 '< /dev/null >/dev/null 2>&1 &)' % cmd)
1512 super(CrosHost, self).suspend(**dargs)
1513
1514
Simran Basiec564392014-08-25 16:48:09 -07001515 def upstart_status(self, service_name):
1516 """Check the status of an upstart init script.
1517
1518 @param service_name: Service to look up.
1519
1520 @returns True if the service is running, False otherwise.
1521 """
1522 return self.run('status %s | grep start/running' %
1523 service_name).stdout.strip() != ''
1524
1525
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001526 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001527 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001528
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001529 Tests for the following conditions:
1530 1. All conditions tested by the parent version of this
1531 function.
1532 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001533 3. Sufficient space in /mnt/stateful_partition/encrypted.
1534 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001535
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001536 """
MK Ryu35d661e2014-09-25 17:44:10 -07001537 # Check if a job was crashed on this host.
1538 # If yes, avoid verification until crash-logs are collected.
1539 if self._need_crash_logs():
1540 raise error.AutoservCrashLogCollectRequired(
1541 'Need to collect crash-logs before verification')
1542
Fang Deng0ca40e22013-08-27 17:47:44 -07001543 super(CrosHost, self).verify_software()
J. Richard Barnette4164d1d2014-12-02 17:52:33 -08001544 self.check_inodes(
1545 '/mnt/stateful_partition',
1546 global_config.global_config.get_config_value(
1547 'SERVER', 'kilo_inodes_required', type=int,
1548 default=100))
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001549 self.check_diskspace(
1550 '/mnt/stateful_partition',
1551 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001552 'SERVER', 'gb_diskspace_required', type=float,
1553 default=20.0))
Gaurav Shahe448af82014-06-19 15:18:59 -07001554 encrypted_stateful_path = '/mnt/stateful_partition/encrypted'
1555 # Not all targets build with encrypted stateful support.
1556 if self.path_exists(encrypted_stateful_path):
1557 self.check_diskspace(
1558 encrypted_stateful_path,
1559 global_config.global_config.get_config_value(
1560 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1561 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001562
Simran Basiec564392014-08-25 16:48:09 -07001563 if not self.upstart_status('system-services'):
Prashanth B5d0a0512014-04-25 12:26:08 -07001564 raise error.AutoservError('Chrome failed to reach login. '
1565 'System services not running.')
1566
beepsc87ff602013-07-31 21:53:00 -07001567 # Factory images don't run update engine,
1568 # goofy controls dbus on these DUTs.
1569 if not self._is_factory_image():
1570 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001571 # Makes sure python is present, loads and can use built in functions.
1572 # We have seen cases where importing cPickle fails with undefined
1573 # symbols in cPickle.so.
1574 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001575
1576
Dan Shi49ca0932014-11-14 11:22:27 -08001577 def verify_hardware(self):
1578 """Verify hardware system of a Chrome OS system.
1579
1580 Check following hardware conditions:
1581 1. Battery level.
1582 2. Is power adapter connected.
1583 """
1584 logging.info('Battery percentage: %s', self.get_battery_percentage())
1585 logging.info('Device %s power adapter connected and charging.',
1586 'has' if self.is_ac_connected() else 'does not have')
1587
1588
Fang Deng96667ca2013-08-01 17:46:18 -07001589 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1590 connect_timeout=None, alive_interval=None):
1591 """Override default make_ssh_command to use options tuned for Chrome OS.
1592
1593 Tuning changes:
1594 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1595 connection failure. Consistency with remote_access.sh.
1596
1597 - ServerAliveInterval=180; which causes SSH to ping connection every
1598 180 seconds. In conjunction with ServerAliveCountMax ensures
1599 that if the connection dies, Autotest will bail out quickly.
1600 Originally tried 60 secs, but saw frequent job ABORTS where
1601 the test completed successfully.
1602
1603 - ServerAliveCountMax=3; consistency with remote_access.sh.
1604
1605 - ConnectAttempts=4; reduce flakiness in connection errors;
1606 consistency with remote_access.sh.
1607
1608 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1609 Host keys change with every new installation, don't waste
1610 memory/space saving them.
1611
1612 - SSH protocol forced to 2; needed for ServerAliveInterval.
1613
1614 @param user User name to use for the ssh connection.
1615 @param port Port on the target host to use for ssh connection.
1616 @param opts Additional options to the ssh command.
1617 @param hosts_file Ignored.
1618 @param connect_timeout Ignored.
1619 @param alive_interval Ignored.
1620 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001621 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1622 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001623 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1624 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1625 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1626 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001627 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1628 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001629
1630
beeps32a63082013-08-22 14:02:29 -07001631 def _create_ssh_tunnel(self, port, local_port):
1632 """Create an ssh tunnel from local_port to port.
1633
1634 @param port: remote port on the host.
1635 @param local_port: local forwarding port.
1636
1637 @return: the tunnel process.
1638 """
1639 # Chrome OS on the target closes down most external ports
1640 # for security. We could open the port, but doing that
1641 # would conflict with security tests that check that only
1642 # expected ports are open. So, to get to the port on the
1643 # target we use an ssh tunnel.
1644 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1645 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1646 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1647 logging.debug('Full tunnel command: %s', tunnel_cmd)
1648 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1649 logging.debug('Started ssh tunnel, local = %d'
1650 ' remote = %d, pid = %d',
1651 local_port, port, tunnel_proc.pid)
1652 return tunnel_proc
1653
1654
Christopher Wileydd181852013-10-10 19:56:58 -07001655 def _setup_rpc(self, port, command_name, remote_pid=None):
beeps32a63082013-08-22 14:02:29 -07001656 """Sets up a tunnel process and performs rpc connection book keeping.
1657
1658 This method assumes that xmlrpc and jsonrpc never conflict, since
1659 we can only either have an xmlrpc or a jsonrpc server listening on
1660 a remote port. As such, it enforces a single proxy->remote port
1661 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1662 and then tries to start an xmlrpc proxy forwarded to the same port,
1663 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1664
1665 1. None of the methods on the xmlrpc proxy will work because
1666 the server listening on B is jsonrpc.
1667
1668 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1669 server, as the only use case currently is goofy, which is tied to
1670 the factory image. It is much easier to handle a failed xmlrpc
1671 call on the client than it is to terminate goofy in this scenario,
1672 as doing the latter might leave the DUT in a hard to recover state.
1673
1674 With the current implementation newer rpc proxy connections will
1675 terminate the tunnel processes of older rpc connections tunneling
1676 to the same remote port. If methods are invoked on the client
1677 after this has happened they will fail with connection closed errors.
1678
1679 @param port: The remote forwarding port.
1680 @param command_name: The name of the remote process, to terminate
1681 using pkill.
1682
1683 @return A url that we can use to initiate the rpc connection.
1684 """
1685 self.rpc_disconnect(port)
1686 local_port = utils.get_unused_port()
1687 tunnel_proc = self._create_ssh_tunnel(port, local_port)
Christopher Wileydd181852013-10-10 19:56:58 -07001688 self._rpc_proxy_map[port] = (command_name, tunnel_proc, remote_pid)
beeps32a63082013-08-22 14:02:29 -07001689 return self._RPC_PROXY_URL % local_port
1690
1691
Christopher Wileyd78249a2013-03-01 13:05:31 -08001692 def xmlrpc_connect(self, command, port, command_name=None,
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001693 ready_test_name=None, timeout_seconds=10,
1694 logfile='/dev/null'):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001695 """Connect to an XMLRPC server on the host.
1696
1697 The `command` argument should be a simple shell command that
1698 starts an XMLRPC server on the given `port`. The command
1699 must not daemonize, and must terminate cleanly on SIGTERM.
1700 The command is started in the background on the host, and a
1701 local XMLRPC client for the server is created and returned
1702 to the caller.
1703
1704 Note that the process of creating an XMLRPC client makes no
1705 attempt to connect to the remote server; the caller is
1706 responsible for determining whether the server is running
1707 correctly, and is ready to serve requests.
1708
Christopher Wileyd78249a2013-03-01 13:05:31 -08001709 Optionally, the caller can pass ready_test_name, a string
1710 containing the name of a method to call on the proxy. This
1711 method should take no parameters and return successfully only
1712 when the server is ready to process client requests. When
1713 ready_test_name is set, xmlrpc_connect will block until the
1714 proxy is ready, and throw a TestError if the server isn't
1715 ready by timeout_seconds.
1716
beeps32a63082013-08-22 14:02:29 -07001717 If a server is already running on the remote port, this
1718 method will kill it and disconnect the tunnel process
1719 associated with the connection before establishing a new one,
1720 by consulting the rpc_proxy_map in rpc_disconnect.
1721
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001722 @param command Shell command to start the server.
1723 @param port Port number on which the server is expected to
1724 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001725 @param command_name String to use as input to `pkill` to
1726 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001727 @param ready_test_name String containing the name of a
1728 method defined on the XMLRPC server.
1729 @param timeout_seconds Number of seconds to wait
1730 for the server to become 'ready.' Will throw a
1731 TestFail error if server is not ready in time.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001732 @param logfile Logfile to send output when running
1733 'command' argument.
Yusuf Mohsinally8d19e3c2013-11-21 14:25:45 -08001734
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001735 """
Christopher Wileyc14f06a2013-10-16 13:55:39 -07001736 # Clean up any existing state. If the caller is willing
1737 # to believe their server is down, we ought to clean up
1738 # any tunnels we might have sitting around.
1739 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001740 # Start the server on the host. Redirection in the command
1741 # below is necessary, because 'ssh' won't terminate until
1742 # background child processes close stdin, stdout, and
1743 # stderr.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001744 remote_cmd = '%s </dev/null >%s 2>&1 & echo $!' % (command, logfile)
Christopher Wileydd181852013-10-10 19:56:58 -07001745 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001746 logging.debug('Started XMLRPC server on host %s, pid = %s',
1747 self.hostname, remote_pid)
1748
Christopher Wileydd181852013-10-10 19:56:58 -07001749 # Tunnel through SSH to be able to reach that remote port.
1750 rpc_url = self._setup_rpc(port, command_name, remote_pid=remote_pid)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001751 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001752
Christopher Wileyd78249a2013-03-01 13:05:31 -08001753 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001754 # retry.retry logs each attempt; calculate delay_sec to
1755 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001756 @retry.retry((socket.error,
1757 xmlrpclib.ProtocolError,
1758 httplib.BadStatusLine),
Chris Sosa65425082013-10-16 13:26:22 -07001759 timeout_min=timeout_seconds / 60.0,
1760 delay_sec=min(max(timeout_seconds / 20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001761 def ready_test():
1762 """ Call proxy.ready_test_name(). """
1763 getattr(proxy, ready_test_name)()
1764 successful = False
1765 try:
1766 logging.info('Waiting %d seconds for XMLRPC server '
1767 'to start.', timeout_seconds)
1768 ready_test()
1769 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001770 finally:
1771 if not successful:
1772 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001773 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001774 logging.info('XMLRPC server started successfully.')
1775 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001776
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001777
Jason Abeleb6f924f2013-11-13 16:01:54 -08001778 def syslog(self, message, tag='autotest'):
1779 """Logs a message to syslog on host.
1780
1781 @param message String message to log into syslog
1782 @param tag String tag prefix for syslog
1783
1784 """
1785 self.run('logger -t "%s" "%s"' % (tag, message))
1786
1787
beeps32a63082013-08-22 14:02:29 -07001788 def jsonrpc_connect(self, port):
1789 """Creates a jsonrpc proxy connection through an ssh tunnel.
1790
1791 This method exists to facilitate communication with goofy (which is
1792 the default system manager on all factory images) and as such, leaves
1793 most of the rpc server sanity checking to the caller. Unlike
1794 xmlrpc_connect, this method does not facilitate the creation of a remote
1795 jsonrpc server, as the only clients of this code are factory tests,
1796 for which the goofy system manager is built in to the image and starts
1797 when the target boots.
1798
1799 One can theoretically create multiple jsonrpc proxies all forwarded
1800 to the same remote port, provided the remote port has an rpc server
1801 listening. However, in doing so we stand the risk of leaking an
1802 existing tunnel process, so we always disconnect any older tunnels
1803 we might have through rpc_disconnect.
1804
1805 @param port: port on the remote host that is serving this proxy.
1806
1807 @return: The client proxy.
1808 """
1809 if not jsonrpclib:
1810 logging.warning('Jsonrpclib could not be imported. Check that '
1811 'site-packages contains jsonrpclib.')
1812 return None
1813
1814 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1815
1816 logging.info('Established a jsonrpc connection through port %s.', port)
1817 return proxy
1818
1819
1820 def rpc_disconnect(self, port):
1821 """Disconnect from an RPC server on the host.
1822
1823 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001824 the given `port`. Also closes the local ssh tunnel created
1825 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001826 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001827 client object; however disconnection will cause all
1828 subsequent calls to methods on the object to fail.
1829
1830 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001831 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001832
1833 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001834 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001835 """
beeps32a63082013-08-22 14:02:29 -07001836 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001837 return
Christopher Wileydd181852013-10-10 19:56:58 -07001838 remote_name, tunnel_proc, remote_pid = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001839 if remote_name:
1840 # We use 'pkill' to find our target process rather than
1841 # a PID, because the host may have rebooted since
1842 # connecting, and we don't want to kill an innocent
1843 # process with the same PID.
1844 #
1845 # 'pkill' helpfully exits with status 1 if no target
1846 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001847 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001848 # status.
1849 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001850 if remote_pid:
1851 logging.info('Waiting for RPC server "%s" shutdown',
1852 remote_name)
1853 start_time = time.time()
1854 while (time.time() - start_time <
1855 self._RPC_SHUTDOWN_TIMEOUT_SECONDS):
1856 running_processes = self.run(
1857 "pgrep -f '%s'" % remote_name,
1858 ignore_status=True).stdout.split()
1859 if not remote_pid in running_processes:
1860 logging.info('Shut down RPC server.')
1861 break
1862 time.sleep(self._RPC_SHUTDOWN_POLLING_PERIOD_SECONDS)
1863 else:
1864 raise error.TestError('Failed to shutdown RPC server %s' %
1865 remote_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001866
1867 if tunnel_proc.poll() is None:
1868 tunnel_proc.terminate()
1869 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1870 else:
1871 logging.debug('Tunnel pid %d terminated early, status %d',
1872 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001873 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001874
1875
beeps32a63082013-08-22 14:02:29 -07001876 def rpc_disconnect_all(self):
1877 """Disconnect all known RPC proxy ports."""
1878 for port in self._rpc_proxy_map.keys():
1879 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001880
1881
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001882 def poor_mans_rpc(self, fun):
1883 """
1884 Calls a function from client utils on the host and returns a string.
1885
1886 @param fun function in client utils namespace.
1887 @return output string from calling fun.
1888 """
Simran Basi263a9d32014-08-19 11:16:51 -07001889 script = 'cd %s/bin; ' % autotest.Autotest.get_installed_autodir(self)
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001890 script += 'python -c "import common; import utils;'
1891 script += 'print utils.%s"' % fun
1892 return script
1893
1894
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001895 def _ping_check_status(self, status):
1896 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001897
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001898 @param status Check the ping status against this value.
1899 @return True iff `status` and the result of ping are the same
1900 (i.e. both True or both False).
1901
1902 """
1903 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1904 return not (status ^ (ping_val == 0))
1905
1906 def _ping_wait_for_status(self, status, timeout):
1907 """Wait for the host to have a given status (UP or DOWN).
1908
1909 Status is checked by polling. Polling will not last longer
1910 than the number of seconds in `timeout`. The polling
1911 interval will be long enough that only approximately
1912 _PING_WAIT_COUNT polling cycles will be executed, subject
1913 to a maximum interval of about one minute.
1914
1915 @param status Waiting will stop immediately if `ping` of the
1916 host returns this status.
1917 @param timeout Poll for at most this many seconds.
1918 @return True iff the host status from `ping` matched the
1919 requested status at the time of return.
1920
1921 """
1922 # _ping_check_status() takes about 1 second, hence the
1923 # "- 1" in the formula below.
1924 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1925 end_time = time.time() + timeout
1926 while time.time() <= end_time:
1927 if self._ping_check_status(status):
1928 return True
1929 if poll_interval > 0:
1930 time.sleep(poll_interval)
1931
1932 # The last thing we did was sleep(poll_interval), so it may
1933 # have been too long since the last `ping`. Check one more
1934 # time, just to be sure.
1935 return self._ping_check_status(status)
1936
1937 def ping_wait_up(self, timeout):
1938 """Wait for the host to respond to `ping`.
1939
1940 N.B. This method is not a reliable substitute for
1941 `wait_up()`, because a host that responds to ping will not
1942 necessarily respond to ssh. This method should only be used
1943 if the target DUT can be considered functional even if it
1944 can't be reached via ssh.
1945
1946 @param timeout Minimum time to allow before declaring the
1947 host to be non-responsive.
1948 @return True iff the host answered to ping before the timeout.
1949
1950 """
1951 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001952
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001953 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001954 """Wait until the host no longer responds to `ping`.
1955
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001956 This function can be used as a slightly faster version of
1957 `wait_down()`, by avoiding potentially long ssh timeouts.
1958
1959 @param timeout Minimum time to allow for the host to become
1960 non-responsive.
1961 @return True iff the host quit answering ping before the
1962 timeout.
1963
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001964 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001965 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001966
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001967 def test_wait_for_sleep(self, sleep_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001968 """Wait for the client to enter low-power sleep mode.
1969
1970 The test for "is asleep" can't distinguish a system that is
1971 powered off; to confirm that the unit was asleep, it is
1972 necessary to force resume, and then call
1973 `test_wait_for_resume()`.
1974
1975 This function is expected to be called from a test as part
1976 of a sequence like the following:
1977
1978 ~~~~~~~~
1979 boot_id = host.get_boot_id()
1980 # trigger sleep on the host
1981 host.test_wait_for_sleep()
1982 # trigger resume on the host
1983 host.test_wait_for_resume(boot_id)
1984 ~~~~~~~~
1985
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001986 @param sleep_timeout time limit in seconds to allow the host sleep.
1987
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001988 @exception TestFail The host did not go to sleep within
1989 the allowed time.
1990 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001991 if sleep_timeout is None:
1992 sleep_timeout = self.SLEEP_TIMEOUT
1993
1994 if not self.ping_wait_down(timeout=sleep_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001995 raise error.TestFail(
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001996 'client failed to sleep after %d seconds' % sleep_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001997
1998
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001999 def test_wait_for_resume(self, old_boot_id, resume_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002000 """Wait for the client to resume from low-power sleep mode.
2001
2002 The `old_boot_id` parameter should be the value from
2003 `get_boot_id()` obtained prior to entering sleep mode. A
2004 `TestFail` exception is raised if the boot id changes.
2005
2006 See @ref test_wait_for_sleep for more on this function's
2007 usage.
2008
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002009 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002010 target host went to sleep.
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002011 @param resume_timeout time limit in seconds to allow the host up.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002012
2013 @exception TestFail The host did not respond within the
2014 allowed time.
2015 @exception TestFail The host responded, but the boot id test
2016 indicated a reboot rather than a sleep
2017 cycle.
2018 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002019 if resume_timeout is None:
2020 resume_timeout = self.RESUME_TIMEOUT
2021
2022 if not self.wait_up(timeout=resume_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002023 raise error.TestFail(
2024 'client failed to resume from sleep after %d seconds' %
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002025 resume_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002026 else:
2027 new_boot_id = self.get_boot_id()
2028 if new_boot_id != old_boot_id:
2029 raise error.TestFail(
2030 'client rebooted, but sleep was expected'
2031 ' (old boot %s, new boot %s)'
2032 % (old_boot_id, new_boot_id))
2033
2034
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002035 def test_wait_for_shutdown(self, shutdown_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002036 """Wait for the client to shut down.
2037
2038 The test for "has shut down" can't distinguish a system that
2039 is merely asleep; to confirm that the unit was down, it is
2040 necessary to force boot, and then call test_wait_for_boot().
2041
2042 This function is expected to be called from a test as part
2043 of a sequence like the following:
2044
2045 ~~~~~~~~
2046 boot_id = host.get_boot_id()
2047 # trigger shutdown on the host
2048 host.test_wait_for_shutdown()
2049 # trigger boot on the host
2050 host.test_wait_for_boot(boot_id)
2051 ~~~~~~~~
2052
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002053 @param shutdown_timeout time limit in seconds to allow the host down.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002054 @exception TestFail The host did not shut down within the
2055 allowed time.
2056 """
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002057 if shutdown_timeout is None:
2058 shutdown_timeout = self.SHUTDOWN_TIMEOUT
2059
2060 if not self.ping_wait_down(timeout=shutdown_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002061 raise error.TestFail(
2062 'client failed to shut down after %d seconds' %
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002063 shutdown_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002064
2065
2066 def test_wait_for_boot(self, old_boot_id=None):
2067 """Wait for the client to boot from cold power.
2068
2069 The `old_boot_id` parameter should be the value from
2070 `get_boot_id()` obtained prior to shutting down. A
2071 `TestFail` exception is raised if the boot id does not
2072 change. The boot id test is omitted if `old_boot_id` is not
2073 specified.
2074
2075 See @ref test_wait_for_shutdown for more on this function's
2076 usage.
2077
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002078 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002079 shut down.
2080
2081 @exception TestFail The host did not respond within the
2082 allowed time.
2083 @exception TestFail The host responded, but the boot id test
2084 indicated that there was no reboot.
2085 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002086 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002087 raise error.TestFail(
2088 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002089 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002090 elif old_boot_id:
2091 if self.get_boot_id() == old_boot_id:
2092 raise error.TestFail(
2093 'client is back up, but did not reboot'
2094 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07002095
2096
2097 @staticmethod
2098 def check_for_rpm_support(hostname):
2099 """For a given hostname, return whether or not it is powered by an RPM.
2100
Simran Basi1df55112013-09-06 11:25:09 -07002101 @param hostname: hostname to check for rpm support.
2102
Simran Basid5e5e272012-09-24 15:23:59 -07002103 @return None if this host does not follows the defined naming format
2104 for RPM powered DUT's in the lab. If it does follow the format,
2105 it returns a regular expression MatchObject instead.
2106 """
Fang Dengdeba14f2014-11-14 11:54:09 -08002107 m = re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
2108 if not m:
2109 return None
2110 try:
2111 lab = int(m.group(1))
2112 row = int(m.group(3))
2113 rack = int(m.group(4))
2114 except (TypeError, ValueError) as e:
2115 return m
2116 if lab == 2 and row>= 1 and row<= 5 and rack>= 1 and rack<= 7:
2117 # TODO(fdeng): temporarily disable support for duts
2118 # behined hydra2 in chromeos2, remove once
2119 # b/17612645 is fixed.
2120 return None
2121 if lab == 4 and (rack == 0 or row == 13):
2122 # TODO(fdeng): disable support for duts behind hydra3
2123 # for chromeos4, remove once b/15410667 is fixed
2124 return None
2125 return m
Simran Basid5e5e272012-09-24 15:23:59 -07002126
2127
2128 def has_power(self):
2129 """For this host, return whether or not it is powered by an RPM.
2130
2131 @return True if this host is in the CROS lab and follows the defined
2132 naming format.
2133 """
Fang Deng0ca40e22013-08-27 17:47:44 -07002134 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002135
2136
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002137 def _set_power(self, state, power_method):
2138 """Sets the power to the host via RPM, Servo or manual.
2139
2140 @param state Specifies which power state to set to DUT
2141 @param power_method Specifies which method of power control to
2142 use. By default "RPM" will be used. Valid values
2143 are the strings "RPM", "manual", "servoj10".
2144
2145 """
2146 ACCEPTABLE_STATES = ['ON', 'OFF']
2147
2148 if state.upper() not in ACCEPTABLE_STATES:
2149 raise error.TestError('State must be one of: %s.'
2150 % (ACCEPTABLE_STATES,))
2151
2152 if power_method == self.POWER_CONTROL_SERVO:
2153 logging.info('Setting servo port J10 to %s', state)
2154 self.servo.set('prtctl3_pwren', state.lower())
2155 time.sleep(self._USB_POWER_TIMEOUT)
2156 elif power_method == self.POWER_CONTROL_MANUAL:
2157 logging.info('You have %d seconds to set the AC power to %s.',
2158 self._POWER_CYCLE_TIMEOUT, state)
2159 time.sleep(self._POWER_CYCLE_TIMEOUT)
2160 else:
2161 if not self.has_power():
2162 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07002163 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
2164 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
2165 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07002166 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07002167
2168
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002169 def power_off(self, power_method=POWER_CONTROL_RPM):
2170 """Turn off power to this host via RPM, Servo or manual.
2171
2172 @param power_method Specifies which method of power control to
2173 use. By default "RPM" will be used. Valid values
2174 are the strings "RPM", "manual", "servoj10".
2175
2176 """
2177 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07002178
2179
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002180 def power_on(self, power_method=POWER_CONTROL_RPM):
2181 """Turn on power to this host via RPM, Servo or manual.
2182
2183 @param power_method Specifies which method of power control to
2184 use. By default "RPM" will be used. Valid values
2185 are the strings "RPM", "manual", "servoj10".
2186
2187 """
2188 self._set_power('ON', power_method)
2189
2190
2191 def power_cycle(self, power_method=POWER_CONTROL_RPM):
2192 """Cycle power to this host by turning it OFF, then ON.
2193
2194 @param power_method Specifies which method of power control to
2195 use. By default "RPM" will be used. Valid values
2196 are the strings "RPM", "manual", "servoj10".
2197
2198 """
2199 if power_method in (self.POWER_CONTROL_SERVO,
2200 self.POWER_CONTROL_MANUAL):
2201 self.power_off(power_method=power_method)
2202 time.sleep(self._POWER_CYCLE_TIMEOUT)
2203 self.power_on(power_method=power_method)
2204 else:
2205 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002206
2207
2208 def get_platform(self):
2209 """Determine the correct platform label for this host.
2210
2211 @returns a string representing this host's platform.
2212 """
2213 crossystem = utils.Crossystem(self)
2214 crossystem.init()
2215 # Extract fwid value and use the leading part as the platform id.
2216 # fwid generally follow the format of {platform}.{firmware version}
2217 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
2218 platform = crossystem.fwid().split('.')[0].lower()
2219 # Newer platforms start with 'Google_' while the older ones do not.
2220 return platform.replace('google_', '')
2221
2222
Hung-ying Tyanb1328032014-04-01 14:18:54 +08002223 def get_architecture(self):
2224 """Determine the correct architecture label for this host.
2225
2226 @returns a string representing this host's architecture.
2227 """
2228 crossystem = utils.Crossystem(self)
2229 crossystem.init()
2230 return crossystem.arch()
2231
2232
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002233 def get_chrome_version(self):
2234 """Gets the Chrome version number and milestone as strings.
2235
2236 Invokes "chrome --version" to get the version number and milestone.
2237
2238 @return A tuple (chrome_ver, milestone) where "chrome_ver" is the
2239 current Chrome version number as a string (in the form "W.X.Y.Z")
2240 and "milestone" is the first component of the version number
2241 (the "W" from "W.X.Y.Z"). If the version number cannot be parsed
2242 in the "W.X.Y.Z" format, the "chrome_ver" will be the full output
2243 of "chrome --version" and the milestone will be the empty string.
2244
2245 """
MK Ryu35d661e2014-09-25 17:44:10 -07002246 version_string = self.run(client_constants.CHROME_VERSION_COMMAND).stdout
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002247 return utils.parse_chrome_version(version_string)
2248
Aviv Keshet74c89a92013-02-04 15:18:30 -08002249 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07002250 def get_board(self):
2251 """Determine the correct board label for this host.
2252
2253 @returns a string representing this host's board.
2254 """
2255 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
2256 run_method=self.run)
2257 board = release_info['CHROMEOS_RELEASE_BOARD']
2258 # Devices in the lab generally have the correct board name but our own
2259 # development devices have {board_name}-signed-{key_type}. The board
2260 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08002261 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07002262 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08002263 return board_format_string % board.split('-')[0]
2264 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07002265
2266
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002267 @label_decorator('board_freq_mem')
2268 def get_board_with_frequency_and_memory(self):
2269 """
2270 Determines the board name with frequency and memory.
2271
2272 @returns a more detailed string representing the board. Examples are
2273 butterfly_1.1GHz_2GB, link_1.8GHz_4GB, x86-zgb_1.7GHz_2GB
2274 """
2275 board = self.run(self.poor_mans_rpc(
2276 'get_board_with_frequency_and_memory()')).stdout
2277 return 'board_freq_mem:%s' % str.strip(board)
2278
2279
Aviv Keshet74c89a92013-02-04 15:18:30 -08002280 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002281 def has_lightsensor(self):
2282 """Determine the correct board label for this host.
2283
2284 @returns the string 'lightsensor' if this host has a lightsensor or
2285 None if it does not.
2286 """
2287 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08002288 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07002289 try:
2290 # Run the search cmd following the symlinks. Stderr_tee is set to
2291 # None as there can be a symlink loop, but the command will still
2292 # execute correctly with a few messages printed to stderr.
2293 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
2294 return 'lightsensor'
2295 except error.AutoservRunError:
2296 # egrep exited with a return code of 1 meaning none of the possible
2297 # lightsensor files existed.
2298 return None
2299
2300
Aviv Keshet74c89a92013-02-04 15:18:30 -08002301 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002302 def has_bluetooth(self):
2303 """Determine the correct board label for this host.
2304
2305 @returns the string 'bluetooth' if this host has bluetooth or
2306 None if it does not.
2307 """
2308 try:
2309 self.run('test -d /sys/class/bluetooth/hci0')
2310 # test exited with a return code of 0.
2311 return 'bluetooth'
2312 except error.AutoservRunError:
2313 # test exited with a return code 1 meaning the directory did not
2314 # exist.
2315 return None
2316
2317
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002318 @label_decorator('gpu_family')
2319 def get_gpu_family(self):
2320 """
2321 Determine GPU family.
2322
2323 @returns a string representing the gpu family. Examples are mali, tegra,
2324 pinetrail, sandybridge, ivybridge, haswell and baytrail.
2325 """
2326 gpu_family = self.run(self.poor_mans_rpc('get_gpu_family()')).stdout
2327 return 'gpu_family:%s' % str.strip(gpu_family)
2328
2329
Ilja Friedel0ce0b602013-08-15 18:45:27 -07002330 @label_decorator('graphics')
2331 def get_graphics(self):
2332 """
2333 Determine the correct board label for this host.
2334
2335 @returns a string representing this host's graphics. For now ARM boards
2336 return graphics:gles while all other boards return graphics:gl. This
2337 may change over time, but for robustness reasons this should avoid
2338 executing code in actual graphics libraries (which may not be ready and
2339 is tested by graphics_GLAPICheck).
2340 """
2341 uname = self.run('uname -a').stdout.lower()
2342 if 'arm' in uname:
2343 return 'graphics:gles'
2344 return 'graphics:gl'
2345
2346
Bill Richardson4f595f52014-02-13 16:20:26 -08002347 @label_decorator('ec')
2348 def get_ec(self):
2349 """
2350 Determine the type of EC on this host.
2351
2352 @returns a string representing this host's embedded controller type.
2353 At present, it only returns "ec:cros", for Chrome OS ECs. Other types
2354 of EC (or none) don't return any strings, since no tests depend on
2355 those.
2356 """
2357 cmd = 'mosys ec info'
2358 # The output should look like these, so that the last field should
2359 # match our EC version scheme:
2360 #
2361 # stm | stm32f100 | snow_v1.3.139-375eb9f
2362 # ti | Unknown-10de | peppy_v1.5.114-5d52788
2363 #
2364 # Non-Chrome OS ECs will look like these:
2365 #
2366 # ENE | KB932 | 00BE107A00
2367 # ite | it8518 | 3.08
2368 #
2369 # And some systems don't have ECs at all (Lumpy, for example).
2370 regexp = r'^.*\|\s*(\S+_v\d+\.\d+\.\d+-[0-9a-f]+)\s*$'
2371
2372 ecinfo = self.run(command=cmd, ignore_status=True)
2373 if ecinfo.exit_status == 0:
2374 res = re.search(regexp, ecinfo.stdout)
2375 if res:
2376 logging.info("EC version is %s", res.groups()[0])
2377 return 'ec:cros'
2378 logging.info("%s got: %s", cmd, ecinfo.stdout)
2379 # Has an EC, but it's not a Chrome OS EC
2380 return None
2381 logging.info("%s exited with status %d", cmd, ecinfo.exit_status)
2382 # No EC present
2383 return None
2384
2385
Alec Berg31b932b2014-04-04 16:09:11 -07002386 @label_decorator('accels')
2387 def get_accels(self):
2388 """
2389 Determine the type of accelerometers on this host.
2390
2391 @returns a string representing this host's accelerometer type.
2392 At present, it only returns "accel:cros-ec", for accelerometers
2393 attached to a Chrome OS EC, or none, if no accelerometers.
2394 """
2395 # Check to make sure we have ectool
2396 rv = self.run('which ectool', ignore_status=True)
2397 if rv.exit_status:
2398 logging.info("No ectool cmd found, assuming no EC accelerometers")
2399 return None
2400
2401 # Check that the EC supports the motionsense command
2402 rv = self.run('ectool motionsense', ignore_status=True)
2403 if rv.exit_status:
2404 logging.info("EC does not support motionsense command "
2405 "assuming no EC accelerometers")
2406 return None
2407
2408 # Check that EC motion sensors are active
2409 active = self.run('ectool motionsense active').stdout.split('\n')
2410 if active[0] == "0":
2411 logging.info("Motion sense inactive, assuming no EC accelerometers")
2412 return None
2413
2414 logging.info("EC accelerometers found")
2415 return 'accel:cros-ec'
2416
2417
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002418 @label_decorator('chameleon')
2419 def has_chameleon(self):
2420 """Determine if a Chameleon connected to this host.
2421
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002422 @returns a list containing two strings ('chameleon' and
2423 'chameleon:' + label, e.g. 'chameleon:hdmi') if this host
2424 has a Chameleon or None if it has not.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002425 """
2426 if self._chameleon_host:
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002427 return ['chameleon', 'chameleon:' + self.chameleon.get_label()]
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002428 else:
2429 return None
2430
2431
Derek Basehorec71ff622014-07-07 15:18:40 -07002432 @label_decorator('power_supply')
2433 def get_power_supply(self):
2434 """
2435 Determine what type of power supply the host has
2436
2437 @returns a string representing this host's power supply.
2438 'power:battery' when the device has a battery intended for
2439 extended use
2440 'power:AC_primary' when the device has a battery not intended
2441 for extended use (for moving the machine, etc)
2442 'power:AC_only' when the device has no battery at all.
2443 """
2444 psu = self.run(command='mosys psu type', ignore_status=True)
2445 if psu.exit_status:
2446 # The psu command for mosys is not included for all platforms. The
2447 # assumption is that the device will have a battery if the command
2448 # is not found.
2449 return 'power:battery'
2450
2451 psu_str = psu.stdout.strip()
2452 if psu_str == 'unknown':
2453 return None
2454
2455 return 'power:%s' % psu_str
2456
2457
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002458 @label_decorator('storage')
2459 def get_storage(self):
2460 """
2461 Determine the type of boot device for this host.
2462
2463 Determine if the internal device is SCSI or dw_mmc device.
2464 Then check that it is SSD or HDD or eMMC or something else.
2465
2466 @returns a string representing this host's internal device type.
2467 'storage:ssd' when internal device is solid state drive
2468 'storage:hdd' when internal device is hard disk drive
2469 'storage:mmc' when internal device is mmc drive
2470 None When internal device is something else or
2471 when we are unable to determine the type
2472 """
2473 # The output should be /dev/mmcblk* for SD/eMMC or /dev/sd* for scsi
2474 rootdev_cmd = ' '.join(['. /usr/sbin/write_gpt.sh;',
2475 '. /usr/share/misc/chromeos-common.sh;',
2476 'load_base_vars;',
2477 'get_fixed_dst_drive'])
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002478 rootdev = self.run(command=rootdev_cmd, ignore_status=True)
2479 if rootdev.exit_status:
2480 logging.info("Fail to run %s", rootdev_cmd)
2481 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002482 rootdev_str = rootdev.stdout.strip()
2483
2484 if not rootdev_str:
2485 return None
2486
2487 rootdev_base = os.path.basename(rootdev_str)
2488
2489 mmc_pattern = '/dev/mmcblk[0-9]'
2490 if re.match(mmc_pattern, rootdev_str):
2491 # Use type to determine if the internal device is eMMC or somthing
2492 # else. We can assume that MMC is always an internal device.
2493 type_cmd = 'cat /sys/block/%s/device/type' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002494 type = self.run(command=type_cmd, ignore_status=True)
2495 if type.exit_status:
2496 logging.info("Fail to run %s", type_cmd)
2497 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002498 type_str = type.stdout.strip()
2499
2500 if type_str == 'MMC':
2501 return 'storage:mmc'
2502
2503 scsi_pattern = '/dev/sd[a-z]+'
2504 if re.match(scsi_pattern, rootdev.stdout):
2505 # Read symlink for /sys/block/sd* to determine if the internal
2506 # device is connected via ata or usb.
2507 link_cmd = 'readlink /sys/block/%s' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002508 link = self.run(command=link_cmd, ignore_status=True)
2509 if link.exit_status:
2510 logging.info("Fail to run %s", link_cmd)
2511 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002512 link_str = link.stdout.strip()
2513 if 'usb' in link_str:
2514 return None
2515
2516 # Read rotation to determine if the internal device is ssd or hdd.
2517 rotate_cmd = str('cat /sys/block/%s/queue/rotational'
2518 % rootdev_base)
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002519 rotate = self.run(command=rotate_cmd, ignore_status=True)
2520 if rotate.exit_status:
2521 logging.info("Fail to run %s", rotate_cmd)
2522 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002523 rotate_str = rotate.stdout.strip()
2524
2525 rotate_dict = {'0':'storage:ssd', '1':'storage:hdd'}
2526 return rotate_dict.get(rotate_str)
2527
2528 # All other internal device / error case will always fall here
2529 return None
2530
2531
Dan Shi4e9a2aa2014-03-24 14:28:42 -07002532 @label_decorator('servo')
2533 def get_servo(self):
2534 """Determine if the host has a servo attached.
2535
2536 If the host has a working servo attached, it should have a servo label.
2537
2538 @return: string 'servo' if the host has servo attached. Otherwise,
2539 returns None.
2540 """
2541 return 'servo' if self._servo_host else None
2542
2543
Dan Shi5beba472014-05-28 22:46:07 -07002544 @label_decorator('video_labels')
2545 def get_video_labels(self):
2546 """Run /usr/local/bin/avtest_label_detect to get a list of video labels.
2547
2548 Sample output of avtest_label_detect:
2549 Detected label: hw_video_acc_vp8
2550 Detected label: webcam
2551
2552 @return: A list of labels detected by tool avtest_label_detect.
2553 """
2554 try:
Simran Basi40ca8182014-07-17 18:41:20 -07002555 # TODO (sbasi) crbug.com/391081 - Remove once the proper fix has
2556 # landed and supporting images older than the fix is no longer
2557 # necessary.
2558 # Change back to VT1 so avtest_label_detect does not get stuck.
2559 self.run('chvt 1')
Dan Shi5beba472014-05-28 22:46:07 -07002560 result = self.run('/usr/local/bin/avtest_label_detect').stdout
2561 return re.findall('^Detected label: (\w+)$', result, re.M)
2562 except error.AutoservRunError:
2563 # The tool is not installed.
2564 return []
2565
2566
mussa584b4462014-06-20 15:13:28 -07002567 @label_decorator('video_glitch_detection')
2568 def is_video_glitch_detection_supported(self):
2569 """ Determine if a board under test is supported for video glitch
2570 detection tests.
2571
2572 @return: 'video_glitch_detection' if board is supported, None otherwise.
2573 """
2574 parser = ConfigParser.SafeConfigParser()
2575 filename = os.path.join(
2576 common.autotest_dir, 'client/cros/video/device_spec.conf')
2577
2578 dut = self.get_board().replace(ds_constants.BOARD_PREFIX, '')
2579
2580 try:
2581 parser.read(filename)
mussa584b4462014-06-20 15:13:28 -07002582 supported_boards = parser.sections()
2583
Mussa83c84d62014-10-02 12:11:28 -07002584 return 'video_glitch_detection' if dut in supported_boards else None
mussa584b4462014-06-20 15:13:28 -07002585
2586 except ConfigParser.error:
2587 # something went wrong while parsing the conf file
2588 return None
2589
Katherine Threlkeld7b97a9f2014-06-24 13:47:14 -07002590 @label_decorator('touch_labels')
2591 def get_touch(self):
2592 """
2593 Determine whether board under test has a touchpad or touchscreen.
2594
2595 @return: A list of some combination of 'touchscreen' and 'touchpad',
2596 depending on what is present on the device.
2597 """
2598 labels = []
2599 input_cmd = '/opt/google/input/inputcontrol --names -t %s'
2600 for elt in ['touchpad', 'touchscreen']:
2601 if self.run(input_cmd % elt).stdout:
2602 labels.append(elt)
2603 return labels
2604
2605
mussa584b4462014-06-20 15:13:28 -07002606
Simran Basic6f1f7a2012-10-16 10:47:46 -07002607 def get_labels(self):
2608 """Return a list of labels for this given host.
2609
2610 This is the main way to retrieve all the automatic labels for a host
2611 as it will run through all the currently implemented label functions.
2612 """
2613 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08002614 for label_function in self._LABEL_FUNCTIONS:
J. Richard Barnetteb869b222014-09-03 17:55:44 -07002615 try:
2616 label = label_function(self)
2617 except Exception as e:
2618 logging.error('Label function %s failed; ignoring it.',
2619 label_function.__name__)
2620 logging.exception(e)
2621 label = None
Simran Basic6f1f7a2012-10-16 10:47:46 -07002622 if label:
Dan Shi5beba472014-05-28 22:46:07 -07002623 if type(label) is str:
2624 labels.append(label)
2625 elif type(label) is list:
2626 labels.extend(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -07002627 return labels
Dan Shi85276d42014-04-08 22:11:45 -07002628
2629
2630 def is_boot_from_usb(self):
2631 """Check if DUT is boot from USB.
2632
2633 @return: True if DUT is boot from usb.
2634 """
2635 device = self.run('rootdev -s -d').stdout.strip()
2636 removable = int(self.run('cat /sys/block/%s/removable' %
2637 os.path.basename(device)).stdout.strip())
2638 return removable == 1
Helen Zhang17dae2b2014-11-11 09:25:52 -08002639
2640
2641 def read_from_meminfo(self, key):
Dan Shi49ca0932014-11-14 11:22:27 -08002642 """Return the memory info from /proc/meminfo
Helen Zhang17dae2b2014-11-11 09:25:52 -08002643
2644 @param key: meminfo requested
2645
2646 @return the memory value as a string
2647
2648 """
Helen Zhang17dae2b2014-11-11 09:25:52 -08002649 meminfo = self.run('grep %s /proc/meminfo' % key).stdout.strip()
2650 logging.debug('%s', meminfo)
2651 return int(re.search(r'\d+', meminfo).group(0))