blob: 7ab0cf699ae62b4dd6a674f0762e719b52eb5ba8 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
mussa584b4462014-06-20 15:13:28 -07005import ConfigParser
Aviv Keshet74c89a92013-02-04 15:18:30 -08006import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07007import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import logging
Dan Shi0f466e82013-02-22 15:44:58 -08009import os
Simran Basid5e5e272012-09-24 15:23:59 -070010import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080011import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070014import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070015
mussa584b4462014-06-20 15:13:28 -070016import common
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.bin import utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070018from autotest_lib.client.common_lib import autotemp
Richard Barnette0c73ffc2012-11-19 15:21:18 -080019from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080022from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080023from autotest_lib.client.common_lib.cros import retry
Dan Shi7dca56e2014-11-11 17:07:56 -080024from autotest_lib.client.common_lib.cros.graphite import es_utils
Michael Liangda8c60a2014-06-03 13:24:51 -070025from autotest_lib.client.common_lib.cros.graphite import stats
MK Ryu35d661e2014-09-25 17:44:10 -070026from autotest_lib.client.cros import constants as client_constants
J. Richard Barnette84890bd2014-02-21 11:05:47 -080027from autotest_lib.client.cros import cros_ui
MK Ryu35d661e2014-09-25 17:44:10 -070028from autotest_lib.server import autoserv_parser
29from autotest_lib.server import autotest
30from autotest_lib.server import constants
31from autotest_lib.server import crashcollect
Dan Shia1ecd5c2013-06-06 11:21:31 -070032from autotest_lib.server import utils as server_utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070033from autotest_lib.server.cros import provision
Scott Zawalski89c44dd2013-02-26 09:28:02 -050034from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070035from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Dan Shi9cb0eec2014-06-03 09:04:50 -070036from autotest_lib.server.cros.faft.config.config import Config as FAFTConfig
Fang Deng96667ca2013-08-01 17:46:18 -070037from autotest_lib.server.hosts import abstract_ssh
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +080038from autotest_lib.server.hosts import chameleon_host
Fang Deng5d518f42013-08-02 14:04:32 -070039from autotest_lib.server.hosts import servo_host
Simran Basidcff4252012-11-20 16:13:20 -080040from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070041
42
beeps32a63082013-08-22 14:02:29 -070043try:
44 import jsonrpclib
45except ImportError:
46 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070047
Fang Dengd1c2b732013-08-20 12:59:46 -070048
beepsc87ff602013-07-31 21:53:00 -070049class FactoryImageCheckerException(error.AutoservError):
50 """Exception raised when an image is a factory image."""
51 pass
52
53
Aviv Keshet74c89a92013-02-04 15:18:30 -080054def add_label_detector(label_function_list, label_list=None, label=None):
55 """Decorator used to group functions together into the provided list.
56 @param label_function_list: List of label detecting functions to add
57 decorated function to.
58 @param label_list: List of detectable labels to add detectable labels to.
59 (Default: None)
60 @param label: Label string that is detectable by this detection function
61 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080062 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070063 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080064 """
65 @param func: The function to be added as a detector.
66 """
67 label_function_list.append(func)
68 if label and label_list is not None:
69 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070070 return func
71 return add_func
72
73
Fang Deng0ca40e22013-08-27 17:47:44 -070074class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070075 """Chromium OS specific subclass of Host."""
76
77 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050078 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070079
Richard Barnette03a0c132012-11-05 12:40:35 -080080 # Timeout values (in seconds) associated with various Chrome OS
81 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070082 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080083 # In general, a good rule of thumb is that the timeout can be up
84 # to twice the typical measured value on the slowest platform.
85 # The times here have not necessarily been empirically tested to
86 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070087 #
88 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080089 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
90 # time to restart the netwowrk.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080091 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070092 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080093 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080094 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070095 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080096 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080097 # network.
beepsf079cfb2013-09-18 17:49:51 -070098 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080099 # POWERWASH_BOOT_TIMEOUT: Time to allow for a reboot that
100 # includes powerwash.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700101
102 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800103 RESUME_TIMEOUT = 10
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +0800104 SHUTDOWN_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700105 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700106 USB_BOOT_TIMEOUT = 150
J. Richard Barnette7817b052014-08-28 09:47:29 -0700107 INSTALL_TIMEOUT = 480
Dan Shi2c88eed2013-11-12 10:18:38 -0800108 POWERWASH_BOOT_TIMEOUT = 60
Chris Sosab76e0ee2013-05-22 16:55:41 -0700109
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800110 # REBOOT_TIMEOUT: How long to wait for a reboot.
111 #
Chris Sosab76e0ee2013-05-22 16:55:41 -0700112 # We have a long timeout to ensure we don't flakily fail due to other
113 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
Simran Basi1160e2c2013-10-04 16:00:24 -0700114 # TODO(sbasi - crbug.com/276094) Restore to 5 mins once the 'host did not
115 # return from reboot' bug is solved.
116 REBOOT_TIMEOUT = 480
Chris Sosab76e0ee2013-05-22 16:55:41 -0700117
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800118 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
119 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
120 _USB_POWER_TIMEOUT = 5
121 _POWER_CYCLE_TIMEOUT = 10
122
beeps32a63082013-08-22 14:02:29 -0700123 _RPC_PROXY_URL = 'http://localhost:%d'
Christopher Wileydd181852013-10-10 19:56:58 -0700124 _RPC_SHUTDOWN_POLLING_PERIOD_SECONDS = 2
Peter Qiu4410db72014-06-05 10:32:41 -0700125 # Set shutdown timeout to account for the time for restarting the UI.
126 _RPC_SHUTDOWN_TIMEOUT_SECONDS = cros_ui.RESTART_UI_TIMEOUT
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800127
Richard Barnette82c35912012-11-20 10:09:10 -0800128 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
129 'rpm_recovery_boards', type=str).split(',')
130
131 _MAX_POWER_CYCLE_ATTEMPTS = 6
132 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
Fang Dengdeba14f2014-11-14 11:54:09 -0800133 _RPM_HOSTNAME_REGEX = ('chromeos(\d+)(-row(\d+))?-rack(\d+[a-z]*)'
134 '-host(\d+)')
Gwendal Grignoua66f1d12014-12-03 10:07:26 -0800135 _LIGHT_SENSOR_FILES = [ "in_illuminance0_input",
136 "in_illuminance_input",
137 "in_illuminance0_raw",
138 "in_illuminance_raw",
139 "illuminance0_input"]
Richard Barnette82c35912012-11-20 10:09:10 -0800140 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
141 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800142 _DETECTABLE_LABELS = []
143 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
144 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700145
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800146 # Constants used in ping_wait_up() and ping_wait_down().
147 #
148 # _PING_WAIT_COUNT is the approximate number of polling
149 # cycles to use when waiting for a host state change.
150 #
151 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
152 # for arguments to the internal _ping_wait_for_status()
153 # method.
154 _PING_WAIT_COUNT = 40
155 _PING_STATUS_DOWN = False
156 _PING_STATUS_UP = True
157
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800158 # Allowed values for the power_method argument.
159
160 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
161 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
162 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
163 POWER_CONTROL_RPM = 'RPM'
164 POWER_CONTROL_SERVO = 'servoj10'
165 POWER_CONTROL_MANUAL = 'manual'
166
167 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
168 POWER_CONTROL_SERVO,
169 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800170
Simran Basi5e6339a2013-03-21 11:34:32 -0700171 _RPM_OUTLET_CHANGED = 'outlet_changed'
172
Dan Shi9cb0eec2014-06-03 09:04:50 -0700173 # URL pattern to download firmware image.
174 _FW_IMAGE_URL_PATTERN = global_config.global_config.get_config_value(
175 'CROS', 'firmware_url_pattern', type=str)
beeps687243d2013-07-18 15:29:27 -0700176
MK Ryu35d661e2014-09-25 17:44:10 -0700177 # File that has a list of directories to be collected
178 _LOGS_TO_COLLECT_FILE = os.path.join(
179 common.client_dir, 'common_lib', 'logs_to_collect')
180
181 # Prefix of logging message w.r.t. crash collection
182 _CRASHLOGS_PREFIX = 'collect_crashlogs'
183
184 # Time duration waiting for host up/down check
185 _CHECK_HOST_UP_TIMEOUT_SECS = 15
186
187 # A command that interacts with kernel and hardware (e.g., rm, mkdir, etc)
188 # might not be completely done deep through the hardware when the machine
189 # is powered down right after the command returns.
190 # We should wait for a few seconds to make them done. Finger crossed.
191 _SAFE_WAIT_SECS = 10
192
193
J. Richard Barnette964fba02012-10-24 17:34:29 -0700194 @staticmethod
beeps46dadc92013-11-07 14:07:10 -0800195 def check_host(host, timeout=10):
196 """
197 Check if the given host is a chrome-os host.
198
199 @param host: An ssh host representing a device.
200 @param timeout: The timeout for the run command.
201
202 @return: True if the host device is chromeos.
203
beeps46dadc92013-11-07 14:07:10 -0800204 """
205 try:
Christopher Wiley1ea80942014-02-26 16:45:08 -0800206 result = host.run('grep -q CHROMEOS /etc/lsb-release && '
Simran Basie5f7ae42014-06-26 15:44:06 -0700207 '! which adb >/dev/null 2>&1 && '
208 '! grep -q moblab /etc/lsb-release',
Christopher Wileyfc3eac02013-11-21 16:24:57 -0800209 ignore_status=True, timeout=timeout)
beeps46dadc92013-11-07 14:07:10 -0800210 except (error.AutoservRunError, error.AutoservSSHTimeout):
211 return False
212 return result.exit_status == 0
213
214
215 @staticmethod
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800216 def _extract_arguments(args_dict, key_subset):
217 """Extract options from `args_dict` and return a subset result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800218
219 Take the provided dictionary of argument options and return
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800220 a subset that represent standard arguments needed to construct
221 a test-assistant object (chameleon or servo) for a host. The
222 intent is to provide standard argument processing from
223 run_remote_tests for tests that require a test-assistant board
224 to operate.
225
226 @param args_dict Dictionary from which to extract the arguments.
227 @param key_subset Tuple of keys to extract from the args_dict, e.g.
228 ('servo_host', 'servo_port').
229 """
230 result = {}
231 for arg in key_subset:
232 if arg in args_dict:
233 result[arg] = args_dict[arg]
234 return result
235
236
237 @staticmethod
238 def get_chameleon_arguments(args_dict):
239 """Extract chameleon options from `args_dict` and return the result.
240
241 Recommended usage:
242 ~~~~~~~~
243 args_dict = utils.args_to_dict(args)
244 chameleon_args = hosts.CrosHost.get_chameleon_arguments(args_dict)
245 host = hosts.create_host(machine, chameleon_args=chameleon_args)
246 ~~~~~~~~
247
248 @param args_dict Dictionary from which to extract the chameleon
249 arguments.
250 """
251 return CrosHost._extract_arguments(
252 args_dict, ('chameleon_host', 'chameleon_port'))
253
254
255 @staticmethod
256 def get_servo_arguments(args_dict):
257 """Extract servo options from `args_dict` and return the result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800258
259 Recommended usage:
260 ~~~~~~~~
261 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700262 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800263 host = hosts.create_host(machine, servo_args=servo_args)
264 ~~~~~~~~
265
266 @param args_dict Dictionary from which to extract the servo
267 arguments.
268 """
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800269 return CrosHost._extract_arguments(
270 args_dict, ('servo_host', 'servo_port'))
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700271
J. Richard Barnette964fba02012-10-24 17:34:29 -0700272
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800273 def _initialize(self, hostname, chameleon_args=None, servo_args=None,
274 ssh_verbosity_flag='', ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700275 *args, **dargs):
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800276 """Initialize superclasses, |self.chameleon|, and |self.servo|.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700277
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800278 This method checks whether a chameleon/servo (aka
279 test-assistant objects) is required by checking whether
280 chameleon_args/servo_args is None. This method will only
281 attempt to create the test-assistant object when it is
282 required by the test.
Fang Deng5d518f42013-08-02 14:04:32 -0700283
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800284 For creating the test-assistant object, there are three
285 possibilities: First, if the host is a lab system known to have
286 a test-assistant board, we connect to that board unconditionally.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700287 Second, if we're called from a control file that requires
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800288 test-assistant features for testing, it will pass settings from
289 the arguments, like `servo_host`, `servo_port`. If neither of
290 these cases apply, the test-assistant object will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700291
292 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700293 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700294 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700295 # self.env is a dictionary of environment variable settings
296 # to be exported for commands run on the host.
297 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
298 # errors that might happen.
299 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700300 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700301 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700302 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700303 # TODO(fdeng): We need to simplify the
304 # process of servo and servo_host initialization.
305 # crbug.com/298432
Dan Shi4d478522014-02-14 13:46:32 -0800306 self._servo_host = servo_host.create_servo_host(dut=self.hostname,
307 servo_args=servo_args)
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800308 # TODO(waihong): Do the simplication on Chameleon too.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800309 self._chameleon_host = chameleon_host.create_chameleon_host(
310 dut=self.hostname, chameleon_args=chameleon_args)
311
Dan Shi4d478522014-02-14 13:46:32 -0800312 if self._servo_host is not None:
313 self.servo = self._servo_host.get_servo()
314 else:
315 self.servo = None
316
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800317 if self._chameleon_host:
Tom Wai-Hong Tameaee3402014-01-22 08:52:10 +0800318 self.chameleon = self._chameleon_host.create_chameleon_board()
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800319 else:
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800320 self.chameleon = None
Fang Deng5d518f42013-08-02 14:04:32 -0700321
322
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500323 def get_repair_image_name(self):
324 """Generate a image_name from variables in the global config.
325
326 @returns a str of $board-version/$BUILD.
327
328 """
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500329 board = self._get_board_from_afe()
330 if board is None:
331 raise error.AutoservError('DUT has no board attribute, '
332 'cannot be repaired.')
Dan Shi6964fa52014-12-18 11:04:27 -0800333 stable_version = self._AFE.run('get_stable_version', board=board)
334 build_pattern = global_config.global_config.get_config_value(
335 'CROS', 'stable_build_pattern')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500336 return build_pattern % (board, stable_version)
337
338
Scott Zawalski62bacae2013-03-05 10:40:32 -0500339 def _host_in_AFE(self):
340 """Check if the host is an object the AFE knows.
341
342 @returns the host object.
343 """
344 return self._AFE.get_hosts(hostname=self.hostname)
345
346
Chris Sosab76e0ee2013-05-22 16:55:41 -0700347 def lookup_job_repo_url(self):
348 """Looks up the job_repo_url for the host.
349
350 @returns job_repo_url from AFE or None if not found.
351
352 @raises KeyError if the host does not have a job_repo_url
353 """
354 if not self._host_in_AFE():
355 return None
356
357 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700358 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
359 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700360
361
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500362 def clear_cros_version_labels_and_job_repo_url(self):
363 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500364 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400365 return
366
Scott Zawalski62bacae2013-03-05 10:40:32 -0500367 host_list = [self.hostname]
368 labels = self._AFE.get_labels(
369 name__startswith=ds_constants.VERSION_PREFIX,
370 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800371
Scott Zawalski62bacae2013-03-05 10:40:32 -0500372 for label in labels:
373 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500374
beepscb6f1e22013-06-28 19:14:10 -0700375 self.update_job_repo_url(None, None)
376
377
378 def update_job_repo_url(self, devserver_url, image_name):
379 """
380 Updates the job_repo_url host attribute and asserts it's value.
381
382 @param devserver_url: The devserver to use in the job_repo_url.
383 @param image_name: The name of the image to use in the job_repo_url.
384
385 @raises AutoservError: If we failed to update the job_repo_url.
386 """
387 repo_url = None
388 if devserver_url and image_name:
389 repo_url = tools.get_package_url(devserver_url, image_name)
390 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500391 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700392 if self.lookup_job_repo_url() != repo_url:
393 raise error.AutoservError('Failed to update job_repo_url with %s, '
394 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500395
396
Dan Shie9309262013-06-19 22:50:21 -0700397 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400398 """Add cros_version labels and host attribute job_repo_url.
399
400 @param image_name: The name of the image e.g.
401 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700402
Scott Zawalskieadbf702013-03-14 09:23:06 -0400403 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500404 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400405 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500406
Scott Zawalskieadbf702013-03-14 09:23:06 -0400407 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700408 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500409
410 labels = self._AFE.get_labels(name=cros_label)
411 if labels:
412 label = labels[0]
413 else:
414 label = self._AFE.create_label(name=cros_label)
415
416 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700417 self.update_job_repo_url(devserver_url, image_name)
418
419
beepsdae65fd2013-07-26 16:24:41 -0700420 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700421 """
422 Make sure job_repo_url of this host is valid.
423
joychen03eaad92013-06-26 09:55:21 -0700424 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700425 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
426 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
427 download and extract it. If the devserver embedded in the url is
428 unresponsive, update the job_repo_url of the host after staging it on
429 another devserver.
430
431 @param job_repo_url: A url pointing to the devserver where the autotest
432 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700433 @param tag: The tag from the server job, in the format
434 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700435
436 @raises DevServerException: If we could not resolve a devserver.
437 @raises AutoservError: If we're unable to save the new job_repo_url as
438 a result of choosing a new devserver because the old one failed to
439 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700440 @raises urllib2.URLError: If the devserver embedded in job_repo_url
441 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700442 """
443 job_repo_url = self.lookup_job_repo_url()
444 if not job_repo_url:
445 logging.warning('No job repo url set on host %s', self.hostname)
446 return
447
448 logging.info('Verifying job repo url %s', job_repo_url)
449 devserver_url, image_name = tools.get_devserver_build_from_package_url(
450 job_repo_url)
451
beeps0c865032013-07-30 11:37:06 -0700452 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700453
454 logging.info('Staging autotest artifacts for %s on devserver %s',
455 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700456
457 start_time = time.time()
Simran Basi25e7a922014-10-31 11:56:10 -0700458 ds.stage_artifacts(image_name, ['autotest_packages'])
beeps687243d2013-07-18 15:29:27 -0700459 stage_time = time.time() - start_time
460
461 # Record how much of the verification time comes from a devserver
462 # restage. If we're doing things right we should not see multiple
463 # devservers for a given board/build/branch path.
464 try:
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800465 board, build_type, branch = server_utils.ParseBuildName(
beeps687243d2013-07-18 15:29:27 -0700466 image_name)[:3]
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800467 except server_utils.ParseBuildNameException:
beeps687243d2013-07-18 15:29:27 -0700468 pass
469 else:
beeps0c865032013-07-30 11:37:06 -0700470 devserver = devserver_url[
Chris Sosa65425082013-10-16 13:26:22 -0700471 devserver_url.find('/') + 2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700472 stats_key = {
473 'board': board,
474 'build_type': build_type,
475 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700476 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700477 }
478 stats.Gauge('verify_job_repo_url').send(
479 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
480 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700481
Scott Zawalskieadbf702013-03-14 09:23:06 -0400482
Dan Shi0f466e82013-02-22 15:44:58 -0800483 def _try_stateful_update(self, update_url, force_update, updater):
484 """Try to use stateful update to initialize DUT.
485
486 When DUT is already running the same version that machine_install
487 tries to install, stateful update is a much faster way to clean up
488 the DUT for testing, compared to a full reimage. It is implemeted
489 by calling autoupdater.run_update, but skipping updating root, as
490 updating the kernel is time consuming and not necessary.
491
492 @param update_url: url of the image.
493 @param force_update: Set to True to update the image even if the DUT
494 is running the same version.
495 @param updater: ChromiumOSUpdater instance used to update the DUT.
496 @returns: True if the DUT was updated with stateful update.
497
498 """
J. Richard Barnette3f731032014-04-07 17:42:59 -0700499 # TODO(jrbarnette): Yes, I hate this re.match() test case.
500 # It's better than the alternative: see crbug.com/360944.
501 image_name = autoupdater.url_to_image_name(update_url)
502 release_pattern = r'^.*-release/R[0-9]+-[0-9]+\.[0-9]+\.0$'
503 if not re.match(release_pattern, image_name):
504 return False
Dan Shi0f466e82013-02-22 15:44:58 -0800505 if not updater.check_version():
506 return False
507 if not force_update:
508 logging.info('Canceling stateful update because the new and '
509 'old versions are the same.')
510 return False
511 # Following folders should be rebuilt after stateful update.
512 # A test file is used to confirm each folder gets rebuilt after
513 # the stateful update.
514 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
515 test_file = '.test_file_to_be_deleted'
516 for folder in folders_to_check:
517 touch_path = os.path.join(folder, test_file)
518 self.run('touch %s' % touch_path)
519
520 if not updater.run_update(force_update=True, update_root=False):
521 return False
522
523 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700524 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800525 check_file_cmd = 'test -f %s; echo $?'
526 for folder in folders_to_check:
527 test_file_path = os.path.join(folder, test_file)
528 result = self.run(check_file_cmd % test_file_path,
529 ignore_status=True)
530 if result.exit_status == 1:
531 return False
532 return True
533
534
J. Richard Barnette7275b612013-06-04 18:13:11 -0700535 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800536 """After the DUT is updated, confirm machine_install succeeded.
537
538 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700539 @param expected_kernel: kernel expected to be active after reboot,
540 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800541
542 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700543 # Touch the lab machine file to leave a marker that
544 # distinguishes this image from other test images.
545 # Afterwards, we must re-run the autoreboot script because
546 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800547 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800548 self.run('start autoreboot')
Chris Sosa65425082013-10-16 13:26:22 -0700549 updater.verify_boot_expectations(
550 expected_kernel, rollback_message=
551 'Build %s failed to boot on %s; system rolled back to previous'
552 'build' % (updater.update_version, self.hostname))
J. Richard Barnette7275b612013-06-04 18:13:11 -0700553 # Check that we've got the build we meant to install.
554 if not updater.check_version_to_confirm_install():
555 raise autoupdater.ChromiumOSError(
556 'Failed to update %s to build %s; found build '
557 '%s instead' % (self.hostname,
Chris Sosa65425082013-10-16 13:26:22 -0700558 updater.update_version,
559 updater.get_build_id()))
Dan Shi0f466e82013-02-22 15:44:58 -0800560
561
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700562 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400563 """Stage a build on a devserver and return the update_url.
564
565 @param image_name: a name like lumpy-release/R27-3837.0.0
566 @returns an update URL like:
567 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
568 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700569 if not image_name:
570 image_name = self.get_repair_image_name()
571 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400572 devserver = dev_server.ImageServer.resolve(image_name)
573 devserver.trigger_download(image_name, synchronous=False)
574 return tools.image_url_pattern() % (devserver.url(), image_name)
575
576
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700577 def stage_image_for_servo(self, image_name=None):
578 """Stage a build on a devserver and return the update_url.
579
580 @param image_name: a name like lumpy-release/R27-3837.0.0
581 @returns an update URL like:
582 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
583 """
584 if not image_name:
585 image_name = self.get_repair_image_name()
586 logging.info('Staging build for servo install: %s', image_name)
587 devserver = dev_server.ImageServer.resolve(image_name)
588 devserver.stage_artifacts(image_name, ['test_image'])
589 return devserver.get_test_image_url(image_name)
590
591
beepse539be02013-07-31 21:57:39 -0700592 def stage_factory_image_for_servo(self, image_name):
593 """Stage a build on a devserver and return the update_url.
594
595 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700596
beepse539be02013-07-31 21:57:39 -0700597 @return: An update URL, eg:
598 http://<devserver>/static/canary-channel/\
599 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700600
601 @raises: ValueError if the factory artifact name is missing from
602 the config.
603
beepse539be02013-07-31 21:57:39 -0700604 """
605 if not image_name:
606 logging.error('Need an image_name to stage a factory image.')
607 return
608
beeps12c0a3c2013-09-03 11:58:27 -0700609 factory_artifact = global_config.global_config.get_config_value(
610 'CROS', 'factory_artifact', type=str, default='')
611 if not factory_artifact:
612 raise ValueError('Cannot retrieve the factory artifact name from '
613 'autotest config, and hence cannot stage factory '
614 'artifacts.')
615
beepse539be02013-07-31 21:57:39 -0700616 logging.info('Staging build for servo install: %s', image_name)
617 devserver = dev_server.ImageServer.resolve(image_name)
618 devserver.stage_artifacts(
619 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700620 [factory_artifact],
621 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700622
623 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
624
625
Chris Sosaa3ac2152012-05-23 22:23:13 -0700626 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500627 local_devserver=False, repair=False):
628 """Install the DUT.
629
Dan Shi0f466e82013-02-22 15:44:58 -0800630 Use stateful update if the DUT is already running the same build.
631 Stateful update does not update kernel and tends to run much faster
632 than a full reimage. If the DUT is running a different build, or it
633 failed to do a stateful update, full update, including kernel update,
634 will be applied to the DUT.
635
Scott Zawalskieadbf702013-03-14 09:23:06 -0400636 Once a host enters machine_install its cros_version label will be
637 removed as well as its host attribute job_repo_url (used for
638 package install).
639
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500640 @param update_url: The url to use for the update
641 pattern: http://$devserver:###/update/$build
642 If update_url is None and repair is True we will install the
Dan Shi6964fa52014-12-18 11:04:27 -0800643 stable image listed in afe_stable_versions table. If the table
644 is not setup, global_config value under CROS.stable_cros_version
645 will be used instead.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500646 @param force_update: Force an update even if the version installed
647 is the same. Default:False
648 @param local_devserver: Used by run_remote_test to allow people to
649 use their local devserver. Default: False
650 @param repair: Whether or not we are in repair mode. This adds special
651 cases for repairing a machine like starting update_engine.
652 Setting repair to True sets force_update to True as well.
653 default: False
654 @raises autoupdater.ChromiumOSError
655
656 """
Dan Shi7458bf62013-06-10 12:50:16 -0700657 if update_url:
658 logging.debug('update url is set to %s', update_url)
659 else:
660 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700661 if self._parser.options.image:
662 requested_build = self._parser.options.image
663 if requested_build.startswith('http://'):
664 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700665 logging.debug('update url is retrieved from requested_build'
666 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700667 else:
668 # Try to stage any build that does not start with
669 # http:// on the devservers defined in
670 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700671 update_url = self._stage_image_for_update(requested_build)
672 logging.debug('Build staged, and update_url is set to: %s',
673 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700674 elif repair:
675 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700676 logging.debug('Build staged, and update_url is set to: %s',
677 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400678 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700679 raise autoupdater.ChromiumOSError(
680 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500681
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500682 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800683 # In case the system is in a bad state, we always reboot the machine
684 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700685 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500686 self.run('stop update-engine; start update-engine')
687 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800688
Chris Sosaa3ac2152012-05-23 22:23:13 -0700689 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700690 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800691 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400692 # Remove cros-version and job_repo_url host attribute from host.
693 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800694 # If the DUT is already running the same build, try stateful update
695 # first. Stateful update does not update kernel and tends to run much
696 # faster than a full reimage.
697 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700698 updated = self._try_stateful_update(
699 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800700 if updated:
701 logging.info('DUT is updated with stateful update.')
702 except Exception as e:
703 logging.exception(e)
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700704 logging.warning('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700705
Dan Shi0f466e82013-02-22 15:44:58 -0800706 inactive_kernel = None
707 # Do a full update if stateful update is not applicable or failed.
708 if not updated:
Chris Sosab7612bc2013-03-21 10:32:37 -0700709 # TODO(sosa): Remove temporary hack to get rid of bricked machines
710 # that can't update due to a corrupted policy.
711 self.run('rm -rf /var/lib/whitelist')
Danny Chanad490bd2014-11-04 14:15:57 -0800712 self.run('mkdir /var/lib/whitelist')
Chris Sosab7612bc2013-03-21 10:32:37 -0700713 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400714 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700715
Dan Shi0f466e82013-02-22 15:44:58 -0800716 if updater.run_update(force_update):
717 updated = True
718 # Figure out active and inactive kernel.
719 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700720
Dan Shi0f466e82013-02-22 15:44:58 -0800721 # Ensure inactive kernel has higher priority than active.
722 if (updater.get_kernel_priority(inactive_kernel)
723 < updater.get_kernel_priority(active_kernel)):
724 raise autoupdater.ChromiumOSError(
725 'Update failed. The priority of the inactive kernel'
726 ' partition is less than that of the active kernel'
727 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700728
Dan Shi0f466e82013-02-22 15:44:58 -0800729 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700730 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi5699ac22014-12-19 10:55:49 -0800731
732 if updated:
Dan Shi0f466e82013-02-22 15:44:58 -0800733 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400734 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700735 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800736
Simran Basiae08c8c2014-09-02 11:17:26 -0700737 logging.debug('Cleaning up old autotest directories.')
738 try:
739 installed_autodir = autotest.Autotest.get_installed_autodir(self)
740 self.run('rm -rf ' + installed_autodir)
741 except autotest.AutodirNotFoundError:
742 logging.debug('No autotest installed directory found.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700743
744
Dan Shi9cb0eec2014-06-03 09:04:50 -0700745 def _clear_fw_version_labels(self):
746 """Clear firmware version labels from the machine."""
747 labels = self._AFE.get_labels(
748 name__startswith=provision.FW_VERSION_PREFIX,
749 host__hostname=self.hostname)
750 for label in labels:
751 label.remove_hosts(hosts=[self.hostname])
752
753
754 def _add_fw_version_label(self, build):
755 """Add firmware version label to the machine.
756
757 @param build: Build of firmware.
758
759 """
760 fw_label = provision.fw_version_to_label(build)
761 provision.ensure_label_exists(fw_label)
762 label = self._AFE.get_labels(name__startswith=fw_label)[0]
763 label.add_hosts([self.hostname])
764
765
766 def firmware_install(self, build=None):
767 """Install firmware to the DUT.
768
769 Use stateful update if the DUT is already running the same build.
770 Stateful update does not update kernel and tends to run much faster
771 than a full reimage. If the DUT is running a different build, or it
772 failed to do a stateful update, full update, including kernel update,
773 will be applied to the DUT.
774
775 Once a host enters firmware_install its fw_version label will be
776 removed. After the firmware is updated successfully, a new fw_version
777 label will be added to the host.
778
779 @param build: The build version to which we want to provision the
780 firmware of the machine,
781 e.g. 'link-firmware/R22-2695.1.144'.
782
783 TODO(dshi): After bug 381718 is fixed, update here with corresponding
784 exceptions that could be raised.
785
786 """
787 if not self.servo:
788 raise error.TestError('Host %s does not have servo.' %
789 self.hostname)
790
791 # TODO(fdeng): use host.get_board() after
792 # crbug.com/271834 is fixed.
793 board = self._get_board_from_afe()
794
795 # If build is not set, assume it's repair mode and try to install
796 # firmware from stable CrOS.
797 if not build:
798 build = self.get_repair_image_name()
799
800 config = FAFTConfig(board)
801 if config.use_u_boot:
802 ap_image = 'image-%s.bin' % board
803 else: # Depthcharge platform
804 ap_image = 'image.bin'
805 ec_image = 'ec.bin'
806 ds = dev_server.ImageServer.resolve(build)
807 ds.stage_artifacts(build, ['firmware'])
808
809 tmpd = autotemp.tempdir(unique_id='fwimage')
810 try:
811 fwurl = self._FW_IMAGE_URL_PATTERN % (ds.url(), build)
812 local_tarball = os.path.join(tmpd.name, os.path.basename(fwurl))
813 server_utils.system('wget -O %s %s' % (local_tarball, fwurl),
814 timeout=60)
815 server_utils.system('tar xf %s -C %s %s %s' %
816 (local_tarball, tmpd.name, ap_image, ec_image),
817 timeout=60)
818 server_utils.system('tar xf %s --wildcards -C %s "dts/*"' %
819 (local_tarball, tmpd.name),
820 timeout=60, ignore_status=True)
821
822 self._clear_fw_version_labels()
823 logging.info('Will re-program EC now')
824 self.servo.program_ec(os.path.join(tmpd.name, ec_image))
825 logging.info('Will re-program BIOS now')
826 self.servo.program_bios(os.path.join(tmpd.name, ap_image))
827 self.servo.get_power_state_controller().reset()
828 time.sleep(self.servo.BOOT_DELAY)
829 self._add_fw_version_label()
830 finally:
831 tmpd.clean()
832
833
Dan Shi10e992b2013-08-30 11:02:59 -0700834 def show_update_engine_log(self):
835 """Output update engine log."""
MK Ryu35d661e2014-09-25 17:44:10 -0700836 logging.debug('Dumping %s', client_constants.UPDATE_ENGINE_LOG)
837 self.run('cat %s' % client_constants.UPDATE_ENGINE_LOG)
Dan Shi10e992b2013-08-30 11:02:59 -0700838
839
Richard Barnette82c35912012-11-20 10:09:10 -0800840 def _get_board_from_afe(self):
841 """Retrieve this host's board from its labels in the AFE.
842
843 Looks for a host label of the form "board:<board>", and
844 returns the "<board>" part of the label. `None` is returned
845 if there is not a single, unique label matching the pattern.
846
847 @returns board from label, or `None`.
848 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700849 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800850
851
852 def get_build(self):
853 """Retrieve the current build for this Host from the AFE.
854
855 Looks through this host's labels in the AFE to determine its build.
856
857 @returns The current build or None if it could not find it or if there
858 were multiple build labels assigned to this host.
859 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700860 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800861
862
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500863 def _install_repair(self):
864 """Attempt to repair this host using upate-engine.
865
866 If the host is up, try installing the DUT with a stable
Dan Shi6964fa52014-12-18 11:04:27 -0800867 "repair" version of Chrome OS as defined in afe_stable_versions table.
868 If the table is not setup, global_config value under
869 CROS.stable_cros_version will be used instead.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500870
Scott Zawalski62bacae2013-03-05 10:40:32 -0500871 @raises AutoservRepairMethodNA if the DUT is not reachable.
872 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500873
874 """
875 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500876 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500877 logging.info('Attempting to reimage machine to repair image.')
878 try:
879 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700880 except autoupdater.ChromiumOSError as e:
881 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500882 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500883 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500884
885
Dan Shi2c88eed2013-11-12 10:18:38 -0800886 def _install_repair_with_powerwash(self):
Dan Shi9cc48452013-11-12 12:39:26 -0800887 """Attempt to powerwash first then repair this host using update-engine.
Dan Shi2c88eed2013-11-12 10:18:38 -0800888
Dan Shi9cc48452013-11-12 12:39:26 -0800889 update-engine may fail due to a bad image. In such case, powerwash
890 may help to cleanup the DUT for update-engine to work again.
Dan Shi2c88eed2013-11-12 10:18:38 -0800891
892 @raises AutoservRepairMethodNA if the DUT is not reachable.
893 @raises ChromiumOSError if the install failed for some reason.
894
895 """
896 if not self.is_up():
897 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
898
899 logging.info('Attempting to powerwash the DUT.')
900 self.run('echo "fast safe" > '
901 '/mnt/stateful_partition/factory_install_reset')
902 self.reboot(timeout=self.POWERWASH_BOOT_TIMEOUT, wait=True)
903 if not self.is_up():
Dan Shi9cc48452013-11-12 12:39:26 -0800904 logging.error('Powerwash failed. DUT did not come back after '
Dan Shi2c88eed2013-11-12 10:18:38 -0800905 'reboot.')
906 raise error.AutoservRepairFailure(
907 'DUT failed to boot from powerwash after %d seconds' %
908 self.POWERWASH_BOOT_TIMEOUT)
909
910 logging.info('Powerwash succeeded.')
911 self._install_repair()
912
913
beepsf079cfb2013-09-18 17:49:51 -0700914 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
915 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500916 """
917 Re-install the OS on the DUT by:
918 1) installing a test image on a USB storage device attached to the Servo
919 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800920 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700921 3) installing the image with chromeos-install.
922
Scott Zawalski62bacae2013-03-05 10:40:32 -0500923 @param image_url: If specified use as the url to install on the DUT.
924 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700925 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
926 Factory images need a longer usb_boot_timeout than regular
927 cros images.
928 @param install_timeout: The timeout to use when installing the chromeos
929 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800930
Scott Zawalski62bacae2013-03-05 10:40:32 -0500931 @raises AutoservError if the image fails to boot.
beepsf079cfb2013-09-18 17:49:51 -0700932
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800933 """
beepsf079cfb2013-09-18 17:49:51 -0700934 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
935 % usb_boot_timeout)
936 logging.info('Downloading image to USB, then booting from it. Usb boot '
937 'timeout = %s', usb_boot_timeout)
938 timer = stats.Timer(usb_boot_timer_key)
939 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700940 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700941 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500942 raise error.AutoservRepairFailure(
943 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700944 usb_boot_timeout)
945 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500946
beepsf079cfb2013-09-18 17:49:51 -0700947 install_timer_key = ('servo_install.install_timeout_%s'
948 % install_timeout)
949 timer = stats.Timer(install_timer_key)
950 timer.start()
951 logging.info('Installing image through chromeos-install.')
MK Ryu35d661e2014-09-25 17:44:10 -0700952 self.run('chromeos-install --yes --lab_preserve_logs=%s' %
953 self._LOGS_TO_COLLECT_FILE,
954 timeout=install_timeout)
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800955 self.run('halt')
beepsf079cfb2013-09-18 17:49:51 -0700956 timer.stop()
957
958 logging.info('Power cycling DUT through servo.')
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800959 self.servo.get_power_state_controller().power_off()
Fang Dengafb88142013-05-30 17:44:31 -0700960 self.servo.switch_usbkey('off')
J. Richard Barnette0199cc82014-12-05 17:08:40 -0800961 # N.B. The Servo API requires that we use power_on() here
962 # for two reasons:
963 # 1) After turning on a DUT in recovery mode, you must turn
964 # it off and then on with power_on() once more to
965 # disable recovery mode (this is a Parrot specific
966 # requirement).
967 # 2) After power_off(), the only way to turn on is with
968 # power_on() (this is a Storm specific requirement).
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700969 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -0700970
971 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800972 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
973 raise error.AutoservError('DUT failed to reboot installed '
974 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500975 self.BOOT_TIMEOUT)
976
977
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700978 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500979 """Reinstall the DUT utilizing servo and a test image.
980
981 Re-install the OS on the DUT by:
982 1) installing a test image on a USB storage device attached to the Servo
983 board,
984 2) booting that image in recovery mode, and then
985 3) installing the image with chromeos-install.
986
Scott Zawalski62bacae2013-03-05 10:40:32 -0500987 @raises AutoservRepairMethodNA if the device does not have servo
988 support.
989
990 """
991 if not self.servo:
992 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
993 'DUT has no servo support.')
994
995 logging.info('Attempting to recovery servo enabled device with '
996 'servo_repair_reinstall')
997
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700998 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500999 self.servo_install(image_url)
1000
1001
1002 def _servo_repair_power(self):
1003 """Attempt to repair DUT using an attached Servo.
1004
1005 Attempt to power on the DUT via power_long_press.
1006
1007 @raises AutoservRepairMethodNA if the device does not have servo
1008 support.
1009 @raises AutoservRepairFailure if the repair fails for any reason.
1010 """
1011 if not self.servo:
1012 raise error.AutoservRepairMethodNA('Repair Power NA: '
1013 'DUT has no servo support.')
1014
1015 logging.info('Attempting to recover servo enabled device by '
1016 'powering it off and on.')
1017 self.servo.get_power_state_controller().power_off()
1018 self.servo.get_power_state_controller().power_on()
1019 if self.wait_up(self.BOOT_TIMEOUT):
1020 return
1021
1022 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -08001023
1024
Richard Barnette82c35912012-11-20 10:09:10 -08001025 def _powercycle_to_repair(self):
1026 """Utilize the RPM Infrastructure to bring the host back up.
1027
1028 If the host is not up/repaired after the first powercycle we utilize
1029 auto fallback to the last good install by powercycling and rebooting the
1030 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001031
1032 @raises AutoservRepairMethodNA if the device does not support remote
1033 power.
1034 @raises AutoservRepairFailure if the repair fails for any reason.
1035
Richard Barnette82c35912012-11-20 10:09:10 -08001036 """
Scott Zawalski62bacae2013-03-05 10:40:32 -05001037 if not self.has_power():
1038 raise error.AutoservRepairMethodNA('Device does not support power.')
1039
Richard Barnette82c35912012-11-20 10:09:10 -08001040 logging.info('Attempting repair via RPM powercycle.')
1041 failed_cycles = 0
1042 self.power_cycle()
1043 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
1044 failed_cycles += 1
1045 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -05001046 raise error.AutoservRepairFailure(
1047 'Powercycled host %s %d times; device did not come back'
1048 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -08001049 self.power_cycle()
1050 if failed_cycles == 0:
1051 logging.info('Powercycling was successful first time.')
1052 else:
1053 logging.info('Powercycling was successful after %d failures.',
1054 failed_cycles)
1055
1056
MK Ryu35d661e2014-09-25 17:44:10 -07001057 def _reboot_repair(self):
1058 """SSH to this host and reboot."""
1059 if not self.is_up(self._CHECK_HOST_UP_TIMEOUT_SECS):
1060 raise error.AutoservRepairMethodNA('DUT unreachable for reboot.')
1061 logging.info('Attempting repair via SSH reboot.')
1062 self.reboot(timeout=self.BOOT_TIMEOUT, wait=True)
1063
1064
Prashanth B4d8184f2014-05-05 12:22:02 -07001065 def check_device(self):
1066 """Check if a device is ssh-able, and if so, clean and verify it.
1067
1068 @raise AutoservSSHTimeout: If the ssh ping times out.
1069 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
1070 permissions.
1071 @raise AutoservSshPingHostError: For other AutoservRunErrors during
1072 ssh_ping.
1073 @raises AutoservError: As appropriate, during cleanup and verify.
1074 """
1075 self.ssh_ping()
1076 self.cleanup()
1077 self.verify()
1078
1079
Richard Barnette82c35912012-11-20 10:09:10 -08001080 def repair_full(self):
1081 """Repair a host for repair level NO_PROTECTION.
1082
1083 This overrides the base class function for repair; it does
1084 not call back to the parent class, but instead offers a
1085 simplified implementation based on the capabilities in the
1086 Chrome OS test lab.
1087
Fang Deng5d518f42013-08-02 14:04:32 -07001088 It first verifies and repairs servo if it is a DUT in CrOS
Fang Deng03590af2013-10-07 17:34:20 -07001089 lab and a servo is attached.
Fang Deng5d518f42013-08-02 14:04:32 -07001090
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001091 This escalates in order through the following procedures and verifies
1092 the status using `self.check_device()` after each of them. This is done
1093 until both the repair and the veryfing step succeed.
1094
MK Ryu35d661e2014-09-25 17:44:10 -07001095 Escalation order of repair procedures from less intrusive to
1096 more intrusive repairs:
1097 1. SSH to the DUT and reboot.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001098 2. If there's a servo for the DUT, try to power the DUT off and
1099 on.
MK Ryu35d661e2014-09-25 17:44:10 -07001100 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -08001101 by power-cycling.
MK Ryu35d661e2014-09-25 17:44:10 -07001102 4. Try to re-install to a known stable image using
1103 auto-update.
1104 5. If there's a servo for the DUT, try to re-install via
1105 the servo.
Richard Barnette82c35912012-11-20 10:09:10 -08001106
1107 As with the parent method, the last operation performed on
Prashanth B4d8184f2014-05-05 12:22:02 -07001108 the DUT must be to call `self.check_device()`; If that call fails the
1109 exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -07001110
Scott Zawalski62bacae2013-03-05 10:40:32 -05001111 @raises AutoservRepairTotalFailure if the repair process fails to
1112 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -07001113 @raises ServoHostRepairTotalFailure if the repair process fails to
1114 fix the servo host if one is attached to the DUT.
1115 @raises AutoservSshPermissionDeniedError if it is unable
1116 to ssh to the servo host due to permission error.
1117
Richard Barnette82c35912012-11-20 10:09:10 -08001118 """
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001119 # Caution: Deleting shards relies on repair to always reboot the DUT.
1120
Dan Shi4d478522014-02-14 13:46:32 -08001121 if self._servo_host and not self.servo:
Fang Deng03590af2013-10-07 17:34:20 -07001122 try:
Dan Shi4d478522014-02-14 13:46:32 -08001123 self._servo_host.repair_full()
Fang Deng03590af2013-10-07 17:34:20 -07001124 except Exception as e:
Fang Deng03590af2013-10-07 17:34:20 -07001125 logging.error('Could not create a healthy servo: %s', e)
Dan Shi4d478522014-02-14 13:46:32 -08001126 self.servo = self._servo_host.get_servo()
Fang Deng5d518f42013-08-02 14:04:32 -07001127
MK Ryu35d661e2014-09-25 17:44:10 -07001128 self.try_collect_crashlogs()
1129
Scott Zawalski62bacae2013-03-05 10:40:32 -05001130 # TODO(scottz): This should use something similar to label_decorator,
1131 # but needs to be populated in order so DUTs are repaired with the
1132 # least amount of effort.
MK Ryu35d661e2014-09-25 17:44:10 -07001133 repair_funcs = [self._reboot_repair,
1134 self._servo_repair_power,
1135 self._powercycle_to_repair,
Dan Shi849a1c42014-03-05 11:10:43 -08001136 self._install_repair,
Dan Shi2c88eed2013-11-12 10:18:38 -08001137 self._install_repair_with_powerwash,
MK Ryu35d661e2014-09-25 17:44:10 -07001138 self._servo_repair_reinstall]
Scott Zawalski62bacae2013-03-05 10:40:32 -05001139 errors = []
Simran Basie6130932013-10-01 14:07:52 -07001140 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001141 for repair_func in repair_funcs:
1142 try:
1143 repair_func()
MK Ryu35d661e2014-09-25 17:44:10 -07001144 self.try_collect_crashlogs()
Prashanth B4d8184f2014-05-05 12:22:02 -07001145 self.check_device()
Simran Basie6130932013-10-01 14:07:52 -07001146 stats.Counter(
1147 '%s.SUCCEEDED' % repair_func.__name__).increment()
1148 if board:
1149 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001150 '%s.%s.SUCCEEDED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001151 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001152 return
Simran Basie6130932013-10-01 14:07:52 -07001153 except error.AutoservRepairMethodNA as e:
1154 stats.Counter(
1155 '%s.RepairNA' % repair_func.__name__).increment()
1156 if board:
1157 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001158 '%s.%s.RepairNA' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001159 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001160 logging.warning('Repair function NA: %s', e)
Simran Basie6130932013-10-01 14:07:52 -07001161 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -05001162 except Exception as e:
Simran Basie6130932013-10-01 14:07:52 -07001163 stats.Counter(
1164 '%s.FAILED' % repair_func.__name__).increment()
1165 if board:
1166 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001167 '%s.%s.FAILED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001168 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001169 logging.warning('Failed to repair device: %s', e)
Scott Zawalski62bacae2013-03-05 10:40:32 -05001170 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -05001171
Simran Basie6130932013-10-01 14:07:52 -07001172 stats.Counter('Full_Repair_Failed').increment()
1173 if board:
1174 stats.Counter(
1175 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001176 raise error.AutoservRepairTotalFailure(
1177 'All attempts at repairing the device failed:\n%s' %
1178 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -08001179
1180
MK Ryu35d661e2014-09-25 17:44:10 -07001181 def try_collect_crashlogs(self, check_host_up=True):
1182 """
1183 Check if a host is up and logs need to be collected from the host,
1184 if yes, collect them.
1185
1186 @param check_host_up: Flag for checking host is up. Default is True.
1187 """
1188 try:
1189 crash_job = self._need_crash_logs()
1190 if crash_job:
1191 logging.debug('%s: Job %s was crashed', self._CRASHLOGS_PREFIX,
1192 crash_job)
1193 if not check_host_up or self.is_up(
1194 self._CHECK_HOST_UP_TIMEOUT_SECS):
1195 self._collect_crashlogs(crash_job)
1196 logging.debug('%s: Completed collecting logs for the '
1197 'crashed job %s', self._CRASHLOGS_PREFIX,
1198 crash_job)
1199 except Exception as e:
1200 # Exception should not result in repair failure.
1201 # Therefore, suppress all exceptions here.
1202 logging.error('%s: Failed while trying to collect crash-logs: %s',
1203 self._CRASHLOGS_PREFIX, e)
1204
1205
1206 def _need_crash_logs(self):
1207 """Get the value of need_crash_logs attribute of this host.
1208
1209 @return: Value string of need_crash_logs attribute
1210 None if there is no need_crash_logs attribute
1211 """
1212 attrs = self._AFE.get_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1213 hostname=self.hostname)
1214 assert len(attrs) < 2
1215 return attrs[0].value if attrs else None
1216
1217
1218 def _collect_crashlogs(self, job_id):
1219 """Grab logs from the host where a job was crashed.
1220
1221 First, check if PRIOR_LOGS_DIR exists in the host.
1222 If yes, collect them.
1223 Otherwise, check if a lab-machine marker (_LAB_MACHINE_FILE) exists
1224 in the host.
1225 If yes, the host was repaired automatically, and we collect normal
1226 system logs.
1227
1228 @param job_id: Id of the job that was crashed.
1229 """
1230 crashlogs_dir = crashcollect.get_crashinfo_dir(self,
1231 constants.CRASHLOGS_DEST_DIR_PREFIX)
1232 flag_prior_logs = False
1233
1234 if self.path_exists(client_constants.PRIOR_LOGS_DIR):
1235 flag_prior_logs = True
1236 self._collect_prior_logs(crashlogs_dir)
1237 elif self.path_exists(self._LAB_MACHINE_FILE):
1238 self._collect_system_logs(crashlogs_dir)
1239 else:
1240 logging.warning('%s: Host was manually re-installed without '
1241 '--lab_preserve_log option. Skip collecting '
1242 'crash-logs.', self._CRASHLOGS_PREFIX)
1243
1244 # We make crash collection be one-time effort.
1245 # _collect_prior_logs() and _collect_system_logs() will not throw
1246 # any exception, and following codes will be executed even when
1247 # those methods fail.
1248 # _collect_crashlogs() is called only when the host is up (refer
1249 # to try_collect_crashlogs()). We assume _collect_prior_logs() and
1250 # _collect_system_logs() fail rarely when the host is up.
1251 # In addition, it is not clear how many times we should try crash
1252 # collection again while not triggering next repair unnecessarily.
1253 # Threfore, we try crash collection one time.
1254
1255 # Create a marker file as soon as log collection is done.
1256 # Leave the job id to this marker for gs_offloader to consume.
1257 marker_file = os.path.join(crashlogs_dir, constants.CRASHLOGS_MARKER)
1258 with open(marker_file, 'a') as f:
1259 f.write('%s\n' % job_id)
1260
1261 # Remove need_crash_logs attribute
1262 logging.debug('%s: Remove attribute need_crash_logs from host %s',
1263 self._CRASHLOGS_PREFIX, self.hostname)
1264 self._AFE.set_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1265 None, hostname=self.hostname)
1266
1267 if flag_prior_logs:
1268 logging.debug('%s: Remove %s from host %s', self._CRASHLOGS_PREFIX,
1269 client_constants.PRIOR_LOGS_DIR, self.hostname)
1270 self.run('rm -rf %s; sync' % client_constants.PRIOR_LOGS_DIR)
1271 # Wait for a few seconds to make sure the prior command is
1272 # done deep through storage.
1273 time.sleep(self._SAFE_WAIT_SECS)
1274
1275
1276 def _collect_prior_logs(self, crashlogs_dir):
1277 """Grab prior logs that were stashed before re-installing a host.
1278
1279 @param crashlogs_dir: Directory path where crash-logs are stored.
1280 """
1281 logging.debug('%s: Found %s, collecting them...',
1282 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1283 try:
1284 self.collect_logs(client_constants.PRIOR_LOGS_DIR,
1285 crashlogs_dir, False)
1286 logging.debug('%s: %s is collected',
1287 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1288 except Exception as e:
1289 logging.error('%s: Failed to collect %s: %s',
1290 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR,
1291 e)
1292
1293
1294 def _collect_system_logs(self, crashlogs_dir):
1295 """Grab normal system logs from a host.
1296
1297 @param crashlogs_dir: Directory path where crash-logs are stored.
1298 """
1299 logging.debug('%s: Found %s, collecting system logs...',
1300 self._CRASHLOGS_PREFIX, self._LAB_MACHINE_FILE)
1301 sources = server_utils.parse_simple_config(self._LOGS_TO_COLLECT_FILE)
1302 for src in sources:
1303 try:
1304 if self.path_exists(src):
1305 logging.debug('%s: Collecting %s...',
1306 self._CRASHLOGS_PREFIX, src)
1307 dest = server_utils.concat_path_except_last(
1308 crashlogs_dir, src)
1309 self.collect_logs(src, dest, False)
1310 logging.debug('%s: %s is collected',
1311 self._CRASHLOGS_PREFIX, src)
1312 except Exception as e:
1313 logging.error('%s: Failed to collect %s: %s',
1314 self._CRASHLOGS_PREFIX, src, e)
1315
1316
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001317 def close(self):
beeps32a63082013-08-22 14:02:29 -07001318 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -07001319 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001320
1321
Dan Shi49ca0932014-11-14 11:22:27 -08001322 def get_power_supply_info(self):
1323 """Get the output of power_supply_info.
1324
1325 power_supply_info outputs the info of each power supply, e.g.,
1326 Device: Line Power
1327 online: no
1328 type: Mains
1329 voltage (V): 0
1330 current (A): 0
1331 Device: Battery
1332 state: Discharging
1333 percentage: 95.9276
1334 technology: Li-ion
1335
1336 Above output shows two devices, Line Power and Battery, with details of
1337 each device listed. This function parses the output into a dictionary,
1338 with key being the device name, and value being a dictionary of details
1339 of the device info.
1340
1341 @return: The dictionary of power_supply_info, e.g.,
1342 {'Line Power': {'online': 'yes', 'type': 'main'},
1343 'Battery': {'vendor': 'xyz', 'percentage': '100'}}
1344 """
1345 result = self.run('power_supply_info').stdout.strip()
1346 info = {}
1347 device_name = None
1348 device_info = {}
1349 for line in result.split('\n'):
1350 pair = [v.strip() for v in line.split(':')]
1351 if len(pair) != 2:
1352 continue
1353 if pair[0] == 'Device':
1354 if device_name:
1355 info[device_name] = device_info
1356 device_name = pair[1]
1357 device_info = {}
1358 else:
1359 device_info[pair[0]] = pair[1]
1360 if device_name and not device_name in info:
1361 info[device_name] = device_info
1362 return info
1363
1364
1365 def get_battery_percentage(self):
1366 """Get the battery percentage.
1367
1368 @return: The percentage of battery level, value range from 0-100. Return
1369 None if the battery info cannot be retrieved.
1370 """
1371 try:
1372 info = self.get_power_supply_info()
1373 logging.info(info)
1374 return float(info['Battery']['percentage'])
1375 except KeyError, ValueError:
1376 return None
1377
1378
1379 def is_ac_connected(self):
1380 """Check if the dut has power adapter connected and charging.
1381
1382 @return: True if power adapter is connected and charging.
1383 """
1384 try:
1385 info = self.get_power_supply_info()
1386 return info['Line Power']['online'] == 'yes'
1387 except KeyError:
1388 return False
1389
1390
Simran Basi5e6339a2013-03-21 11:34:32 -07001391 def _cleanup_poweron(self):
1392 """Special cleanup method to make sure hosts always get power back."""
1393 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1394 hosts = afe.get_hosts(hostname=self.hostname)
1395 if not hosts or not (self._RPM_OUTLET_CHANGED in
1396 hosts[0].attributes):
1397 return
1398 logging.debug('This host has recently interacted with the RPM'
1399 ' Infrastructure. Ensuring power is on.')
1400 try:
1401 self.power_on()
Dan Shi7dca56e2014-11-11 17:07:56 -08001402 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1403 hostname=self.hostname)
Simran Basi5e6339a2013-03-21 11:34:32 -07001404 except rpm_client.RemotePowerException:
Simran Basi5e6339a2013-03-21 11:34:32 -07001405 logging.error('Failed to turn Power On for this host after '
1406 'cleanup through the RPM Infrastructure.')
Dan Shi7dca56e2014-11-11 17:07:56 -08001407 es_utils.ESMetadata().post(
1408 type_str='RPM_poweron_failure',
1409 metadata={'hostname': self.hostname})
Dan Shi49ca0932014-11-14 11:22:27 -08001410
1411 battery_percentage = self.get_battery_percentage()
Dan Shif01ebe22014-12-05 13:10:57 -08001412 if battery_percentage and battery_percentage < 50:
Dan Shi49ca0932014-11-14 11:22:27 -08001413 raise
1414 elif self.is_ac_connected():
1415 logging.info('The device has power adapter connected and '
1416 'charging. No need to try to turn RPM on '
1417 'again.')
1418 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1419 hostname=self.hostname)
1420 logging.info('Battery level is now at %s%%. The device may '
1421 'still have enough power to run test, so no '
1422 'exception will be raised.', battery_percentage)
1423
Simran Basi5e6339a2013-03-21 11:34:32 -07001424
beepsc87ff602013-07-31 21:53:00 -07001425 def _is_factory_image(self):
1426 """Checks if the image on the DUT is a factory image.
1427
1428 @return: True if the image on the DUT is a factory image.
1429 False otherwise.
1430 """
1431 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1432 return result.exit_status == 0
1433
1434
1435 def _restart_ui(self):
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001436 """Restart the Chrome UI.
beepsc87ff602013-07-31 21:53:00 -07001437
1438 @raises: FactoryImageCheckerException for factory images, since
1439 we cannot attempt to restart ui on them.
1440 error.AutoservRunError for any other type of error that
1441 occurs while restarting ui.
1442 """
1443 if self._is_factory_image():
1444 raise FactoryImageCheckerException('Cannot restart ui on factory '
1445 'images')
1446
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001447 # TODO(jrbarnette): The command to stop/start the ui job
1448 # should live inside cros_ui, too. However that would seem
1449 # to imply interface changes to the existing start()/restart()
1450 # functions, which is a bridge too far (for now).
1451 prompt = cros_ui.get_login_prompt_state(self)
1452 self.run('stop ui; start ui')
1453 cros_ui.wait_for_chrome_ready(prompt, self)
beepsc87ff602013-07-31 21:53:00 -07001454
1455
1456 def cleanup(self):
MK Ryu35d661e2014-09-25 17:44:10 -07001457 self.run('rm -f %s' % client_constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001458 try:
beepsc87ff602013-07-31 21:53:00 -07001459 self._restart_ui()
1460 except (error.AutotestRunError, error.AutoservRunError,
1461 FactoryImageCheckerException):
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001462 logging.warning('Unable to restart ui, rebooting device.')
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001463 # Since restarting the UI fails fall back to normal Autotest
1464 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001465 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001466 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001467 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001468 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001469
1470
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001471 def reboot(self, **dargs):
1472 """
1473 This function reboots the site host. The more generic
1474 RemoteHost.reboot() performs sync and sleeps for 5
1475 seconds. This is not necessary for Chrome OS devices as the
1476 sync should be finished in a short time during the reboot
1477 command.
1478 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001479 if 'reboot_cmd' not in dargs:
Doug Anderson7d5aeb22014-02-27 15:12:17 -08001480 reboot_timeout = dargs.get('reboot_timeout', 10)
1481 dargs['reboot_cmd'] = ('((reboot & sleep %d; reboot -f &)'
1482 ' </dev/null >/dev/null 2>&1 &)' %
1483 reboot_timeout)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001484 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001485 if 'fastsync' not in dargs:
1486 dargs['fastsync'] = True
Michael Liangda8c60a2014-06-03 13:24:51 -07001487
Charlie Mooneya8e6dab2014-05-29 14:37:55 -07001488 # For purposes of logging reboot times:
1489 # Get the board name i.e. 'daisy_spring'
Michael Liangca4f5a62014-07-10 15:45:13 -07001490 board_fullname = self.get_board()
1491
1492 # Strip the prefix and add it to dargs.
1493 dargs['board'] = board_fullname[board_fullname.find(':')+1:]
Fang Deng0ca40e22013-08-27 17:47:44 -07001494 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001495
1496
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07001497 def suspend(self, **dargs):
1498 """
1499 This function suspends the site host.
1500 """
1501 suspend_time = dargs.get('suspend_time', 60)
1502 dargs['timeout'] = suspend_time
1503 if 'suspend_cmd' not in dargs:
1504 cmd = ' && '.join(['echo 0 > /sys/class/rtc/rtc0/wakealarm',
1505 'echo +%d > /sys/class/rtc/rtc0/wakealarm' % suspend_time,
1506 'powerd_dbus_suspend --delay=0 &'])
1507 dargs['suspend_cmd'] = ('(( %s )'
1508 '< /dev/null >/dev/null 2>&1 &)' % cmd)
1509 super(CrosHost, self).suspend(**dargs)
1510
1511
Simran Basiec564392014-08-25 16:48:09 -07001512 def upstart_status(self, service_name):
1513 """Check the status of an upstart init script.
1514
1515 @param service_name: Service to look up.
1516
1517 @returns True if the service is running, False otherwise.
1518 """
1519 return self.run('status %s | grep start/running' %
1520 service_name).stdout.strip() != ''
1521
1522
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001523 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001524 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001525
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001526 Tests for the following conditions:
1527 1. All conditions tested by the parent version of this
1528 function.
1529 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001530 3. Sufficient space in /mnt/stateful_partition/encrypted.
1531 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001532
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001533 """
MK Ryu35d661e2014-09-25 17:44:10 -07001534 # Check if a job was crashed on this host.
1535 # If yes, avoid verification until crash-logs are collected.
1536 if self._need_crash_logs():
1537 raise error.AutoservCrashLogCollectRequired(
1538 'Need to collect crash-logs before verification')
1539
Fang Deng0ca40e22013-08-27 17:47:44 -07001540 super(CrosHost, self).verify_software()
J. Richard Barnette4164d1d2014-12-02 17:52:33 -08001541 self.check_inodes(
1542 '/mnt/stateful_partition',
1543 global_config.global_config.get_config_value(
1544 'SERVER', 'kilo_inodes_required', type=int,
1545 default=100))
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001546 self.check_diskspace(
1547 '/mnt/stateful_partition',
1548 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001549 'SERVER', 'gb_diskspace_required', type=float,
1550 default=20.0))
Gaurav Shahe448af82014-06-19 15:18:59 -07001551 encrypted_stateful_path = '/mnt/stateful_partition/encrypted'
1552 # Not all targets build with encrypted stateful support.
1553 if self.path_exists(encrypted_stateful_path):
1554 self.check_diskspace(
1555 encrypted_stateful_path,
1556 global_config.global_config.get_config_value(
1557 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1558 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001559
Simran Basiec564392014-08-25 16:48:09 -07001560 if not self.upstart_status('system-services'):
Prashanth B5d0a0512014-04-25 12:26:08 -07001561 raise error.AutoservError('Chrome failed to reach login. '
1562 'System services not running.')
1563
beepsc87ff602013-07-31 21:53:00 -07001564 # Factory images don't run update engine,
1565 # goofy controls dbus on these DUTs.
1566 if not self._is_factory_image():
1567 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001568 # Makes sure python is present, loads and can use built in functions.
1569 # We have seen cases where importing cPickle fails with undefined
1570 # symbols in cPickle.so.
1571 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001572
1573
Dan Shi49ca0932014-11-14 11:22:27 -08001574 def verify_hardware(self):
1575 """Verify hardware system of a Chrome OS system.
1576
1577 Check following hardware conditions:
1578 1. Battery level.
1579 2. Is power adapter connected.
1580 """
1581 logging.info('Battery percentage: %s', self.get_battery_percentage())
1582 logging.info('Device %s power adapter connected and charging.',
1583 'has' if self.is_ac_connected() else 'does not have')
1584
1585
Fang Deng96667ca2013-08-01 17:46:18 -07001586 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1587 connect_timeout=None, alive_interval=None):
1588 """Override default make_ssh_command to use options tuned for Chrome OS.
1589
1590 Tuning changes:
1591 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1592 connection failure. Consistency with remote_access.sh.
1593
1594 - ServerAliveInterval=180; which causes SSH to ping connection every
1595 180 seconds. In conjunction with ServerAliveCountMax ensures
1596 that if the connection dies, Autotest will bail out quickly.
1597 Originally tried 60 secs, but saw frequent job ABORTS where
1598 the test completed successfully.
1599
1600 - ServerAliveCountMax=3; consistency with remote_access.sh.
1601
1602 - ConnectAttempts=4; reduce flakiness in connection errors;
1603 consistency with remote_access.sh.
1604
1605 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1606 Host keys change with every new installation, don't waste
1607 memory/space saving them.
1608
1609 - SSH protocol forced to 2; needed for ServerAliveInterval.
1610
1611 @param user User name to use for the ssh connection.
1612 @param port Port on the target host to use for ssh connection.
1613 @param opts Additional options to the ssh command.
1614 @param hosts_file Ignored.
1615 @param connect_timeout Ignored.
1616 @param alive_interval Ignored.
1617 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001618 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1619 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001620 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1621 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1622 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1623 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001624 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1625 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001626
1627
beeps32a63082013-08-22 14:02:29 -07001628 def _create_ssh_tunnel(self, port, local_port):
1629 """Create an ssh tunnel from local_port to port.
1630
1631 @param port: remote port on the host.
1632 @param local_port: local forwarding port.
1633
1634 @return: the tunnel process.
1635 """
1636 # Chrome OS on the target closes down most external ports
1637 # for security. We could open the port, but doing that
1638 # would conflict with security tests that check that only
1639 # expected ports are open. So, to get to the port on the
1640 # target we use an ssh tunnel.
1641 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1642 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1643 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1644 logging.debug('Full tunnel command: %s', tunnel_cmd)
1645 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1646 logging.debug('Started ssh tunnel, local = %d'
1647 ' remote = %d, pid = %d',
1648 local_port, port, tunnel_proc.pid)
1649 return tunnel_proc
1650
1651
Christopher Wileydd181852013-10-10 19:56:58 -07001652 def _setup_rpc(self, port, command_name, remote_pid=None):
beeps32a63082013-08-22 14:02:29 -07001653 """Sets up a tunnel process and performs rpc connection book keeping.
1654
1655 This method assumes that xmlrpc and jsonrpc never conflict, since
1656 we can only either have an xmlrpc or a jsonrpc server listening on
1657 a remote port. As such, it enforces a single proxy->remote port
1658 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1659 and then tries to start an xmlrpc proxy forwarded to the same port,
1660 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1661
1662 1. None of the methods on the xmlrpc proxy will work because
1663 the server listening on B is jsonrpc.
1664
1665 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1666 server, as the only use case currently is goofy, which is tied to
1667 the factory image. It is much easier to handle a failed xmlrpc
1668 call on the client than it is to terminate goofy in this scenario,
1669 as doing the latter might leave the DUT in a hard to recover state.
1670
1671 With the current implementation newer rpc proxy connections will
1672 terminate the tunnel processes of older rpc connections tunneling
1673 to the same remote port. If methods are invoked on the client
1674 after this has happened they will fail with connection closed errors.
1675
1676 @param port: The remote forwarding port.
1677 @param command_name: The name of the remote process, to terminate
1678 using pkill.
1679
1680 @return A url that we can use to initiate the rpc connection.
1681 """
1682 self.rpc_disconnect(port)
1683 local_port = utils.get_unused_port()
1684 tunnel_proc = self._create_ssh_tunnel(port, local_port)
Christopher Wileydd181852013-10-10 19:56:58 -07001685 self._rpc_proxy_map[port] = (command_name, tunnel_proc, remote_pid)
beeps32a63082013-08-22 14:02:29 -07001686 return self._RPC_PROXY_URL % local_port
1687
1688
Christopher Wileyd78249a2013-03-01 13:05:31 -08001689 def xmlrpc_connect(self, command, port, command_name=None,
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001690 ready_test_name=None, timeout_seconds=10,
1691 logfile='/dev/null'):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001692 """Connect to an XMLRPC server on the host.
1693
1694 The `command` argument should be a simple shell command that
1695 starts an XMLRPC server on the given `port`. The command
1696 must not daemonize, and must terminate cleanly on SIGTERM.
1697 The command is started in the background on the host, and a
1698 local XMLRPC client for the server is created and returned
1699 to the caller.
1700
1701 Note that the process of creating an XMLRPC client makes no
1702 attempt to connect to the remote server; the caller is
1703 responsible for determining whether the server is running
1704 correctly, and is ready to serve requests.
1705
Christopher Wileyd78249a2013-03-01 13:05:31 -08001706 Optionally, the caller can pass ready_test_name, a string
1707 containing the name of a method to call on the proxy. This
1708 method should take no parameters and return successfully only
1709 when the server is ready to process client requests. When
1710 ready_test_name is set, xmlrpc_connect will block until the
1711 proxy is ready, and throw a TestError if the server isn't
1712 ready by timeout_seconds.
1713
beeps32a63082013-08-22 14:02:29 -07001714 If a server is already running on the remote port, this
1715 method will kill it and disconnect the tunnel process
1716 associated with the connection before establishing a new one,
1717 by consulting the rpc_proxy_map in rpc_disconnect.
1718
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001719 @param command Shell command to start the server.
1720 @param port Port number on which the server is expected to
1721 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001722 @param command_name String to use as input to `pkill` to
1723 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001724 @param ready_test_name String containing the name of a
1725 method defined on the XMLRPC server.
1726 @param timeout_seconds Number of seconds to wait
1727 for the server to become 'ready.' Will throw a
1728 TestFail error if server is not ready in time.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001729 @param logfile Logfile to send output when running
1730 'command' argument.
Yusuf Mohsinally8d19e3c2013-11-21 14:25:45 -08001731
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001732 """
Christopher Wileyc14f06a2013-10-16 13:55:39 -07001733 # Clean up any existing state. If the caller is willing
1734 # to believe their server is down, we ought to clean up
1735 # any tunnels we might have sitting around.
1736 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001737 # Start the server on the host. Redirection in the command
1738 # below is necessary, because 'ssh' won't terminate until
1739 # background child processes close stdin, stdout, and
1740 # stderr.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001741 remote_cmd = '%s </dev/null >%s 2>&1 & echo $!' % (command, logfile)
Christopher Wileydd181852013-10-10 19:56:58 -07001742 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001743 logging.debug('Started XMLRPC server on host %s, pid = %s',
1744 self.hostname, remote_pid)
1745
Christopher Wileydd181852013-10-10 19:56:58 -07001746 # Tunnel through SSH to be able to reach that remote port.
1747 rpc_url = self._setup_rpc(port, command_name, remote_pid=remote_pid)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001748 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001749
Christopher Wileyd78249a2013-03-01 13:05:31 -08001750 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001751 # retry.retry logs each attempt; calculate delay_sec to
1752 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001753 @retry.retry((socket.error,
1754 xmlrpclib.ProtocolError,
1755 httplib.BadStatusLine),
Chris Sosa65425082013-10-16 13:26:22 -07001756 timeout_min=timeout_seconds / 60.0,
1757 delay_sec=min(max(timeout_seconds / 20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001758 def ready_test():
1759 """ Call proxy.ready_test_name(). """
1760 getattr(proxy, ready_test_name)()
1761 successful = False
1762 try:
1763 logging.info('Waiting %d seconds for XMLRPC server '
1764 'to start.', timeout_seconds)
1765 ready_test()
1766 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001767 finally:
1768 if not successful:
1769 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001770 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001771 logging.info('XMLRPC server started successfully.')
1772 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001773
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001774
Jason Abeleb6f924f2013-11-13 16:01:54 -08001775 def syslog(self, message, tag='autotest'):
1776 """Logs a message to syslog on host.
1777
1778 @param message String message to log into syslog
1779 @param tag String tag prefix for syslog
1780
1781 """
1782 self.run('logger -t "%s" "%s"' % (tag, message))
1783
1784
beeps32a63082013-08-22 14:02:29 -07001785 def jsonrpc_connect(self, port):
1786 """Creates a jsonrpc proxy connection through an ssh tunnel.
1787
1788 This method exists to facilitate communication with goofy (which is
1789 the default system manager on all factory images) and as such, leaves
1790 most of the rpc server sanity checking to the caller. Unlike
1791 xmlrpc_connect, this method does not facilitate the creation of a remote
1792 jsonrpc server, as the only clients of this code are factory tests,
1793 for which the goofy system manager is built in to the image and starts
1794 when the target boots.
1795
1796 One can theoretically create multiple jsonrpc proxies all forwarded
1797 to the same remote port, provided the remote port has an rpc server
1798 listening. However, in doing so we stand the risk of leaking an
1799 existing tunnel process, so we always disconnect any older tunnels
1800 we might have through rpc_disconnect.
1801
1802 @param port: port on the remote host that is serving this proxy.
1803
1804 @return: The client proxy.
1805 """
1806 if not jsonrpclib:
1807 logging.warning('Jsonrpclib could not be imported. Check that '
1808 'site-packages contains jsonrpclib.')
1809 return None
1810
1811 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1812
1813 logging.info('Established a jsonrpc connection through port %s.', port)
1814 return proxy
1815
1816
1817 def rpc_disconnect(self, port):
1818 """Disconnect from an RPC server on the host.
1819
1820 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001821 the given `port`. Also closes the local ssh tunnel created
1822 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001823 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001824 client object; however disconnection will cause all
1825 subsequent calls to methods on the object to fail.
1826
1827 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001828 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001829
1830 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001831 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001832 """
beeps32a63082013-08-22 14:02:29 -07001833 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001834 return
Christopher Wileydd181852013-10-10 19:56:58 -07001835 remote_name, tunnel_proc, remote_pid = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001836 if remote_name:
1837 # We use 'pkill' to find our target process rather than
1838 # a PID, because the host may have rebooted since
1839 # connecting, and we don't want to kill an innocent
1840 # process with the same PID.
1841 #
1842 # 'pkill' helpfully exits with status 1 if no target
1843 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001844 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001845 # status.
1846 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001847 if remote_pid:
1848 logging.info('Waiting for RPC server "%s" shutdown',
1849 remote_name)
1850 start_time = time.time()
1851 while (time.time() - start_time <
1852 self._RPC_SHUTDOWN_TIMEOUT_SECONDS):
1853 running_processes = self.run(
1854 "pgrep -f '%s'" % remote_name,
1855 ignore_status=True).stdout.split()
1856 if not remote_pid in running_processes:
1857 logging.info('Shut down RPC server.')
1858 break
1859 time.sleep(self._RPC_SHUTDOWN_POLLING_PERIOD_SECONDS)
1860 else:
1861 raise error.TestError('Failed to shutdown RPC server %s' %
1862 remote_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001863
1864 if tunnel_proc.poll() is None:
1865 tunnel_proc.terminate()
1866 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1867 else:
1868 logging.debug('Tunnel pid %d terminated early, status %d',
1869 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001870 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001871
1872
beeps32a63082013-08-22 14:02:29 -07001873 def rpc_disconnect_all(self):
1874 """Disconnect all known RPC proxy ports."""
1875 for port in self._rpc_proxy_map.keys():
1876 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001877
1878
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001879 def poor_mans_rpc(self, fun):
1880 """
1881 Calls a function from client utils on the host and returns a string.
1882
1883 @param fun function in client utils namespace.
1884 @return output string from calling fun.
1885 """
Simran Basi263a9d32014-08-19 11:16:51 -07001886 script = 'cd %s/bin; ' % autotest.Autotest.get_installed_autodir(self)
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001887 script += 'python -c "import common; import utils;'
1888 script += 'print utils.%s"' % fun
1889 return script
1890
1891
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001892 def _ping_check_status(self, status):
1893 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001894
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001895 @param status Check the ping status against this value.
1896 @return True iff `status` and the result of ping are the same
1897 (i.e. both True or both False).
1898
1899 """
1900 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1901 return not (status ^ (ping_val == 0))
1902
1903 def _ping_wait_for_status(self, status, timeout):
1904 """Wait for the host to have a given status (UP or DOWN).
1905
1906 Status is checked by polling. Polling will not last longer
1907 than the number of seconds in `timeout`. The polling
1908 interval will be long enough that only approximately
1909 _PING_WAIT_COUNT polling cycles will be executed, subject
1910 to a maximum interval of about one minute.
1911
1912 @param status Waiting will stop immediately if `ping` of the
1913 host returns this status.
1914 @param timeout Poll for at most this many seconds.
1915 @return True iff the host status from `ping` matched the
1916 requested status at the time of return.
1917
1918 """
1919 # _ping_check_status() takes about 1 second, hence the
1920 # "- 1" in the formula below.
1921 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1922 end_time = time.time() + timeout
1923 while time.time() <= end_time:
1924 if self._ping_check_status(status):
1925 return True
1926 if poll_interval > 0:
1927 time.sleep(poll_interval)
1928
1929 # The last thing we did was sleep(poll_interval), so it may
1930 # have been too long since the last `ping`. Check one more
1931 # time, just to be sure.
1932 return self._ping_check_status(status)
1933
1934 def ping_wait_up(self, timeout):
1935 """Wait for the host to respond to `ping`.
1936
1937 N.B. This method is not a reliable substitute for
1938 `wait_up()`, because a host that responds to ping will not
1939 necessarily respond to ssh. This method should only be used
1940 if the target DUT can be considered functional even if it
1941 can't be reached via ssh.
1942
1943 @param timeout Minimum time to allow before declaring the
1944 host to be non-responsive.
1945 @return True iff the host answered to ping before the timeout.
1946
1947 """
1948 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001949
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001950 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001951 """Wait until the host no longer responds to `ping`.
1952
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001953 This function can be used as a slightly faster version of
1954 `wait_down()`, by avoiding potentially long ssh timeouts.
1955
1956 @param timeout Minimum time to allow for the host to become
1957 non-responsive.
1958 @return True iff the host quit answering ping before the
1959 timeout.
1960
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001961 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001962 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001963
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001964 def test_wait_for_sleep(self, sleep_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001965 """Wait for the client to enter low-power sleep mode.
1966
1967 The test for "is asleep" can't distinguish a system that is
1968 powered off; to confirm that the unit was asleep, it is
1969 necessary to force resume, and then call
1970 `test_wait_for_resume()`.
1971
1972 This function is expected to be called from a test as part
1973 of a sequence like the following:
1974
1975 ~~~~~~~~
1976 boot_id = host.get_boot_id()
1977 # trigger sleep on the host
1978 host.test_wait_for_sleep()
1979 # trigger resume on the host
1980 host.test_wait_for_resume(boot_id)
1981 ~~~~~~~~
1982
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001983 @param sleep_timeout time limit in seconds to allow the host sleep.
1984
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001985 @exception TestFail The host did not go to sleep within
1986 the allowed time.
1987 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001988 if sleep_timeout is None:
1989 sleep_timeout = self.SLEEP_TIMEOUT
1990
1991 if not self.ping_wait_down(timeout=sleep_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001992 raise error.TestFail(
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001993 'client failed to sleep after %d seconds' % sleep_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001994
1995
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001996 def test_wait_for_resume(self, old_boot_id, resume_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001997 """Wait for the client to resume from low-power sleep mode.
1998
1999 The `old_boot_id` parameter should be the value from
2000 `get_boot_id()` obtained prior to entering sleep mode. A
2001 `TestFail` exception is raised if the boot id changes.
2002
2003 See @ref test_wait_for_sleep for more on this function's
2004 usage.
2005
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002006 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002007 target host went to sleep.
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002008 @param resume_timeout time limit in seconds to allow the host up.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002009
2010 @exception TestFail The host did not respond within the
2011 allowed time.
2012 @exception TestFail The host responded, but the boot id test
2013 indicated a reboot rather than a sleep
2014 cycle.
2015 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002016 if resume_timeout is None:
2017 resume_timeout = self.RESUME_TIMEOUT
2018
2019 if not self.wait_up(timeout=resume_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002020 raise error.TestFail(
2021 'client failed to resume from sleep after %d seconds' %
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002022 resume_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002023 else:
2024 new_boot_id = self.get_boot_id()
2025 if new_boot_id != old_boot_id:
2026 raise error.TestFail(
2027 'client rebooted, but sleep was expected'
2028 ' (old boot %s, new boot %s)'
2029 % (old_boot_id, new_boot_id))
2030
2031
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002032 def test_wait_for_shutdown(self, shutdown_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002033 """Wait for the client to shut down.
2034
2035 The test for "has shut down" can't distinguish a system that
2036 is merely asleep; to confirm that the unit was down, it is
2037 necessary to force boot, and then call test_wait_for_boot().
2038
2039 This function is expected to be called from a test as part
2040 of a sequence like the following:
2041
2042 ~~~~~~~~
2043 boot_id = host.get_boot_id()
2044 # trigger shutdown on the host
2045 host.test_wait_for_shutdown()
2046 # trigger boot on the host
2047 host.test_wait_for_boot(boot_id)
2048 ~~~~~~~~
2049
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002050 @param shutdown_timeout time limit in seconds to allow the host down.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002051 @exception TestFail The host did not shut down within the
2052 allowed time.
2053 """
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002054 if shutdown_timeout is None:
2055 shutdown_timeout = self.SHUTDOWN_TIMEOUT
2056
2057 if not self.ping_wait_down(timeout=shutdown_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002058 raise error.TestFail(
2059 'client failed to shut down after %d seconds' %
Tom Wai-Hong Tamfe005c22014-12-03 09:25:44 +08002060 shutdown_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002061
2062
2063 def test_wait_for_boot(self, old_boot_id=None):
2064 """Wait for the client to boot from cold power.
2065
2066 The `old_boot_id` parameter should be the value from
2067 `get_boot_id()` obtained prior to shutting down. A
2068 `TestFail` exception is raised if the boot id does not
2069 change. The boot id test is omitted if `old_boot_id` is not
2070 specified.
2071
2072 See @ref test_wait_for_shutdown for more on this function's
2073 usage.
2074
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002075 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002076 shut down.
2077
2078 @exception TestFail The host did not respond within the
2079 allowed time.
2080 @exception TestFail The host responded, but the boot id test
2081 indicated that there was no reboot.
2082 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002083 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002084 raise error.TestFail(
2085 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002086 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002087 elif old_boot_id:
2088 if self.get_boot_id() == old_boot_id:
2089 raise error.TestFail(
2090 'client is back up, but did not reboot'
2091 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07002092
2093
2094 @staticmethod
2095 def check_for_rpm_support(hostname):
2096 """For a given hostname, return whether or not it is powered by an RPM.
2097
Simran Basi1df55112013-09-06 11:25:09 -07002098 @param hostname: hostname to check for rpm support.
2099
Simran Basid5e5e272012-09-24 15:23:59 -07002100 @return None if this host does not follows the defined naming format
2101 for RPM powered DUT's in the lab. If it does follow the format,
2102 it returns a regular expression MatchObject instead.
2103 """
Fang Dengdeba14f2014-11-14 11:54:09 -08002104 m = re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
2105 if not m:
2106 return None
2107 try:
2108 lab = int(m.group(1))
2109 row = int(m.group(3))
2110 rack = int(m.group(4))
2111 except (TypeError, ValueError) as e:
2112 return m
2113 if lab == 2 and row>= 1 and row<= 5 and rack>= 1 and rack<= 7:
2114 # TODO(fdeng): temporarily disable support for duts
2115 # behined hydra2 in chromeos2, remove once
2116 # b/17612645 is fixed.
2117 return None
2118 if lab == 4 and (rack == 0 or row == 13):
2119 # TODO(fdeng): disable support for duts behind hydra3
2120 # for chromeos4, remove once b/15410667 is fixed
2121 return None
2122 return m
Simran Basid5e5e272012-09-24 15:23:59 -07002123
2124
2125 def has_power(self):
2126 """For this host, return whether or not it is powered by an RPM.
2127
2128 @return True if this host is in the CROS lab and follows the defined
2129 naming format.
2130 """
Fang Deng0ca40e22013-08-27 17:47:44 -07002131 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002132
2133
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002134 def _set_power(self, state, power_method):
2135 """Sets the power to the host via RPM, Servo or manual.
2136
2137 @param state Specifies which power state to set to DUT
2138 @param power_method Specifies which method of power control to
2139 use. By default "RPM" will be used. Valid values
2140 are the strings "RPM", "manual", "servoj10".
2141
2142 """
2143 ACCEPTABLE_STATES = ['ON', 'OFF']
2144
2145 if state.upper() not in ACCEPTABLE_STATES:
2146 raise error.TestError('State must be one of: %s.'
2147 % (ACCEPTABLE_STATES,))
2148
2149 if power_method == self.POWER_CONTROL_SERVO:
2150 logging.info('Setting servo port J10 to %s', state)
2151 self.servo.set('prtctl3_pwren', state.lower())
2152 time.sleep(self._USB_POWER_TIMEOUT)
2153 elif power_method == self.POWER_CONTROL_MANUAL:
2154 logging.info('You have %d seconds to set the AC power to %s.',
2155 self._POWER_CYCLE_TIMEOUT, state)
2156 time.sleep(self._POWER_CYCLE_TIMEOUT)
2157 else:
2158 if not self.has_power():
2159 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07002160 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
2161 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
2162 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07002163 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07002164
2165
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002166 def power_off(self, power_method=POWER_CONTROL_RPM):
2167 """Turn off power to this host via RPM, Servo or manual.
2168
2169 @param power_method Specifies which method of power control to
2170 use. By default "RPM" will be used. Valid values
2171 are the strings "RPM", "manual", "servoj10".
2172
2173 """
2174 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07002175
2176
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002177 def power_on(self, power_method=POWER_CONTROL_RPM):
2178 """Turn on power to this host via RPM, Servo or manual.
2179
2180 @param power_method Specifies which method of power control to
2181 use. By default "RPM" will be used. Valid values
2182 are the strings "RPM", "manual", "servoj10".
2183
2184 """
2185 self._set_power('ON', power_method)
2186
2187
2188 def power_cycle(self, power_method=POWER_CONTROL_RPM):
2189 """Cycle power to this host by turning it OFF, then ON.
2190
2191 @param power_method Specifies which method of power control to
2192 use. By default "RPM" will be used. Valid values
2193 are the strings "RPM", "manual", "servoj10".
2194
2195 """
2196 if power_method in (self.POWER_CONTROL_SERVO,
2197 self.POWER_CONTROL_MANUAL):
2198 self.power_off(power_method=power_method)
2199 time.sleep(self._POWER_CYCLE_TIMEOUT)
2200 self.power_on(power_method=power_method)
2201 else:
2202 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002203
2204
2205 def get_platform(self):
2206 """Determine the correct platform label for this host.
2207
2208 @returns a string representing this host's platform.
2209 """
2210 crossystem = utils.Crossystem(self)
2211 crossystem.init()
2212 # Extract fwid value and use the leading part as the platform id.
2213 # fwid generally follow the format of {platform}.{firmware version}
2214 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
2215 platform = crossystem.fwid().split('.')[0].lower()
2216 # Newer platforms start with 'Google_' while the older ones do not.
2217 return platform.replace('google_', '')
2218
2219
Hung-ying Tyanb1328032014-04-01 14:18:54 +08002220 def get_architecture(self):
2221 """Determine the correct architecture label for this host.
2222
2223 @returns a string representing this host's architecture.
2224 """
2225 crossystem = utils.Crossystem(self)
2226 crossystem.init()
2227 return crossystem.arch()
2228
2229
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002230 def get_chrome_version(self):
2231 """Gets the Chrome version number and milestone as strings.
2232
2233 Invokes "chrome --version" to get the version number and milestone.
2234
2235 @return A tuple (chrome_ver, milestone) where "chrome_ver" is the
2236 current Chrome version number as a string (in the form "W.X.Y.Z")
2237 and "milestone" is the first component of the version number
2238 (the "W" from "W.X.Y.Z"). If the version number cannot be parsed
2239 in the "W.X.Y.Z" format, the "chrome_ver" will be the full output
2240 of "chrome --version" and the milestone will be the empty string.
2241
2242 """
MK Ryu35d661e2014-09-25 17:44:10 -07002243 version_string = self.run(client_constants.CHROME_VERSION_COMMAND).stdout
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002244 return utils.parse_chrome_version(version_string)
2245
Aviv Keshet74c89a92013-02-04 15:18:30 -08002246 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07002247 def get_board(self):
2248 """Determine the correct board label for this host.
2249
2250 @returns a string representing this host's board.
2251 """
2252 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
2253 run_method=self.run)
2254 board = release_info['CHROMEOS_RELEASE_BOARD']
2255 # Devices in the lab generally have the correct board name but our own
2256 # development devices have {board_name}-signed-{key_type}. The board
2257 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08002258 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07002259 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08002260 return board_format_string % board.split('-')[0]
2261 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07002262
2263
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002264 @label_decorator('board_freq_mem')
2265 def get_board_with_frequency_and_memory(self):
2266 """
2267 Determines the board name with frequency and memory.
2268
2269 @returns a more detailed string representing the board. Examples are
2270 butterfly_1.1GHz_2GB, link_1.8GHz_4GB, x86-zgb_1.7GHz_2GB
2271 """
2272 board = self.run(self.poor_mans_rpc(
2273 'get_board_with_frequency_and_memory()')).stdout
2274 return 'board_freq_mem:%s' % str.strip(board)
2275
2276
Aviv Keshet74c89a92013-02-04 15:18:30 -08002277 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002278 def has_lightsensor(self):
2279 """Determine the correct board label for this host.
2280
2281 @returns the string 'lightsensor' if this host has a lightsensor or
2282 None if it does not.
2283 """
2284 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08002285 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07002286 try:
2287 # Run the search cmd following the symlinks. Stderr_tee is set to
2288 # None as there can be a symlink loop, but the command will still
2289 # execute correctly with a few messages printed to stderr.
2290 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
2291 return 'lightsensor'
2292 except error.AutoservRunError:
2293 # egrep exited with a return code of 1 meaning none of the possible
2294 # lightsensor files existed.
2295 return None
2296
2297
Aviv Keshet74c89a92013-02-04 15:18:30 -08002298 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002299 def has_bluetooth(self):
2300 """Determine the correct board label for this host.
2301
2302 @returns the string 'bluetooth' if this host has bluetooth or
2303 None if it does not.
2304 """
2305 try:
2306 self.run('test -d /sys/class/bluetooth/hci0')
2307 # test exited with a return code of 0.
2308 return 'bluetooth'
2309 except error.AutoservRunError:
2310 # test exited with a return code 1 meaning the directory did not
2311 # exist.
2312 return None
2313
2314
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002315 @label_decorator('gpu_family')
2316 def get_gpu_family(self):
2317 """
2318 Determine GPU family.
2319
2320 @returns a string representing the gpu family. Examples are mali, tegra,
2321 pinetrail, sandybridge, ivybridge, haswell and baytrail.
2322 """
2323 gpu_family = self.run(self.poor_mans_rpc('get_gpu_family()')).stdout
2324 return 'gpu_family:%s' % str.strip(gpu_family)
2325
2326
Ilja Friedel0ce0b602013-08-15 18:45:27 -07002327 @label_decorator('graphics')
2328 def get_graphics(self):
2329 """
2330 Determine the correct board label for this host.
2331
2332 @returns a string representing this host's graphics. For now ARM boards
2333 return graphics:gles while all other boards return graphics:gl. This
2334 may change over time, but for robustness reasons this should avoid
2335 executing code in actual graphics libraries (which may not be ready and
2336 is tested by graphics_GLAPICheck).
2337 """
2338 uname = self.run('uname -a').stdout.lower()
2339 if 'arm' in uname:
2340 return 'graphics:gles'
2341 return 'graphics:gl'
2342
2343
Bill Richardson4f595f52014-02-13 16:20:26 -08002344 @label_decorator('ec')
2345 def get_ec(self):
2346 """
2347 Determine the type of EC on this host.
2348
2349 @returns a string representing this host's embedded controller type.
2350 At present, it only returns "ec:cros", for Chrome OS ECs. Other types
2351 of EC (or none) don't return any strings, since no tests depend on
2352 those.
2353 """
2354 cmd = 'mosys ec info'
2355 # The output should look like these, so that the last field should
2356 # match our EC version scheme:
2357 #
2358 # stm | stm32f100 | snow_v1.3.139-375eb9f
2359 # ti | Unknown-10de | peppy_v1.5.114-5d52788
2360 #
2361 # Non-Chrome OS ECs will look like these:
2362 #
2363 # ENE | KB932 | 00BE107A00
2364 # ite | it8518 | 3.08
2365 #
2366 # And some systems don't have ECs at all (Lumpy, for example).
2367 regexp = r'^.*\|\s*(\S+_v\d+\.\d+\.\d+-[0-9a-f]+)\s*$'
2368
2369 ecinfo = self.run(command=cmd, ignore_status=True)
2370 if ecinfo.exit_status == 0:
2371 res = re.search(regexp, ecinfo.stdout)
2372 if res:
2373 logging.info("EC version is %s", res.groups()[0])
2374 return 'ec:cros'
2375 logging.info("%s got: %s", cmd, ecinfo.stdout)
2376 # Has an EC, but it's not a Chrome OS EC
2377 return None
2378 logging.info("%s exited with status %d", cmd, ecinfo.exit_status)
2379 # No EC present
2380 return None
2381
2382
Alec Berg31b932b2014-04-04 16:09:11 -07002383 @label_decorator('accels')
2384 def get_accels(self):
2385 """
2386 Determine the type of accelerometers on this host.
2387
2388 @returns a string representing this host's accelerometer type.
2389 At present, it only returns "accel:cros-ec", for accelerometers
2390 attached to a Chrome OS EC, or none, if no accelerometers.
2391 """
2392 # Check to make sure we have ectool
2393 rv = self.run('which ectool', ignore_status=True)
2394 if rv.exit_status:
2395 logging.info("No ectool cmd found, assuming no EC accelerometers")
2396 return None
2397
2398 # Check that the EC supports the motionsense command
2399 rv = self.run('ectool motionsense', ignore_status=True)
2400 if rv.exit_status:
2401 logging.info("EC does not support motionsense command "
2402 "assuming no EC accelerometers")
2403 return None
2404
2405 # Check that EC motion sensors are active
2406 active = self.run('ectool motionsense active').stdout.split('\n')
2407 if active[0] == "0":
2408 logging.info("Motion sense inactive, assuming no EC accelerometers")
2409 return None
2410
2411 logging.info("EC accelerometers found")
2412 return 'accel:cros-ec'
2413
2414
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002415 @label_decorator('chameleon')
2416 def has_chameleon(self):
2417 """Determine if a Chameleon connected to this host.
2418
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002419 @returns a list containing two strings ('chameleon' and
2420 'chameleon:' + label, e.g. 'chameleon:hdmi') if this host
2421 has a Chameleon or None if it has not.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002422 """
2423 if self._chameleon_host:
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002424 return ['chameleon', 'chameleon:' + self.chameleon.get_label()]
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002425 else:
2426 return None
2427
2428
Derek Basehorec71ff622014-07-07 15:18:40 -07002429 @label_decorator('power_supply')
2430 def get_power_supply(self):
2431 """
2432 Determine what type of power supply the host has
2433
2434 @returns a string representing this host's power supply.
2435 'power:battery' when the device has a battery intended for
2436 extended use
2437 'power:AC_primary' when the device has a battery not intended
2438 for extended use (for moving the machine, etc)
2439 'power:AC_only' when the device has no battery at all.
2440 """
2441 psu = self.run(command='mosys psu type', ignore_status=True)
2442 if psu.exit_status:
2443 # The psu command for mosys is not included for all platforms. The
2444 # assumption is that the device will have a battery if the command
2445 # is not found.
2446 return 'power:battery'
2447
2448 psu_str = psu.stdout.strip()
2449 if psu_str == 'unknown':
2450 return None
2451
2452 return 'power:%s' % psu_str
2453
2454
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002455 @label_decorator('storage')
2456 def get_storage(self):
2457 """
2458 Determine the type of boot device for this host.
2459
2460 Determine if the internal device is SCSI or dw_mmc device.
2461 Then check that it is SSD or HDD or eMMC or something else.
2462
2463 @returns a string representing this host's internal device type.
2464 'storage:ssd' when internal device is solid state drive
2465 'storage:hdd' when internal device is hard disk drive
2466 'storage:mmc' when internal device is mmc drive
2467 None When internal device is something else or
2468 when we are unable to determine the type
2469 """
2470 # The output should be /dev/mmcblk* for SD/eMMC or /dev/sd* for scsi
2471 rootdev_cmd = ' '.join(['. /usr/sbin/write_gpt.sh;',
2472 '. /usr/share/misc/chromeos-common.sh;',
2473 'load_base_vars;',
2474 'get_fixed_dst_drive'])
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002475 rootdev = self.run(command=rootdev_cmd, ignore_status=True)
2476 if rootdev.exit_status:
2477 logging.info("Fail to run %s", rootdev_cmd)
2478 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002479 rootdev_str = rootdev.stdout.strip()
2480
2481 if not rootdev_str:
2482 return None
2483
2484 rootdev_base = os.path.basename(rootdev_str)
2485
2486 mmc_pattern = '/dev/mmcblk[0-9]'
2487 if re.match(mmc_pattern, rootdev_str):
2488 # Use type to determine if the internal device is eMMC or somthing
2489 # else. We can assume that MMC is always an internal device.
2490 type_cmd = 'cat /sys/block/%s/device/type' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002491 type = self.run(command=type_cmd, ignore_status=True)
2492 if type.exit_status:
2493 logging.info("Fail to run %s", type_cmd)
2494 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002495 type_str = type.stdout.strip()
2496
2497 if type_str == 'MMC':
2498 return 'storage:mmc'
2499
2500 scsi_pattern = '/dev/sd[a-z]+'
2501 if re.match(scsi_pattern, rootdev.stdout):
2502 # Read symlink for /sys/block/sd* to determine if the internal
2503 # device is connected via ata or usb.
2504 link_cmd = 'readlink /sys/block/%s' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002505 link = self.run(command=link_cmd, ignore_status=True)
2506 if link.exit_status:
2507 logging.info("Fail to run %s", link_cmd)
2508 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002509 link_str = link.stdout.strip()
2510 if 'usb' in link_str:
2511 return None
2512
2513 # Read rotation to determine if the internal device is ssd or hdd.
2514 rotate_cmd = str('cat /sys/block/%s/queue/rotational'
2515 % rootdev_base)
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002516 rotate = self.run(command=rotate_cmd, ignore_status=True)
2517 if rotate.exit_status:
2518 logging.info("Fail to run %s", rotate_cmd)
2519 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002520 rotate_str = rotate.stdout.strip()
2521
2522 rotate_dict = {'0':'storage:ssd', '1':'storage:hdd'}
2523 return rotate_dict.get(rotate_str)
2524
2525 # All other internal device / error case will always fall here
2526 return None
2527
2528
Dan Shi4e9a2aa2014-03-24 14:28:42 -07002529 @label_decorator('servo')
2530 def get_servo(self):
2531 """Determine if the host has a servo attached.
2532
2533 If the host has a working servo attached, it should have a servo label.
2534
2535 @return: string 'servo' if the host has servo attached. Otherwise,
2536 returns None.
2537 """
2538 return 'servo' if self._servo_host else None
2539
2540
Dan Shi5beba472014-05-28 22:46:07 -07002541 @label_decorator('video_labels')
2542 def get_video_labels(self):
2543 """Run /usr/local/bin/avtest_label_detect to get a list of video labels.
2544
2545 Sample output of avtest_label_detect:
2546 Detected label: hw_video_acc_vp8
2547 Detected label: webcam
2548
2549 @return: A list of labels detected by tool avtest_label_detect.
2550 """
2551 try:
Simran Basi40ca8182014-07-17 18:41:20 -07002552 # TODO (sbasi) crbug.com/391081 - Remove once the proper fix has
2553 # landed and supporting images older than the fix is no longer
2554 # necessary.
2555 # Change back to VT1 so avtest_label_detect does not get stuck.
2556 self.run('chvt 1')
Dan Shi5beba472014-05-28 22:46:07 -07002557 result = self.run('/usr/local/bin/avtest_label_detect').stdout
2558 return re.findall('^Detected label: (\w+)$', result, re.M)
2559 except error.AutoservRunError:
2560 # The tool is not installed.
2561 return []
2562
2563
mussa584b4462014-06-20 15:13:28 -07002564 @label_decorator('video_glitch_detection')
2565 def is_video_glitch_detection_supported(self):
2566 """ Determine if a board under test is supported for video glitch
2567 detection tests.
2568
2569 @return: 'video_glitch_detection' if board is supported, None otherwise.
2570 """
2571 parser = ConfigParser.SafeConfigParser()
2572 filename = os.path.join(
2573 common.autotest_dir, 'client/cros/video/device_spec.conf')
2574
2575 dut = self.get_board().replace(ds_constants.BOARD_PREFIX, '')
2576
2577 try:
2578 parser.read(filename)
mussa584b4462014-06-20 15:13:28 -07002579 supported_boards = parser.sections()
2580
Mussa83c84d62014-10-02 12:11:28 -07002581 return 'video_glitch_detection' if dut in supported_boards else None
mussa584b4462014-06-20 15:13:28 -07002582
2583 except ConfigParser.error:
2584 # something went wrong while parsing the conf file
2585 return None
2586
Katherine Threlkeld7b97a9f2014-06-24 13:47:14 -07002587 @label_decorator('touch_labels')
2588 def get_touch(self):
2589 """
2590 Determine whether board under test has a touchpad or touchscreen.
2591
2592 @return: A list of some combination of 'touchscreen' and 'touchpad',
2593 depending on what is present on the device.
2594 """
2595 labels = []
2596 input_cmd = '/opt/google/input/inputcontrol --names -t %s'
2597 for elt in ['touchpad', 'touchscreen']:
2598 if self.run(input_cmd % elt).stdout:
2599 labels.append(elt)
2600 return labels
2601
2602
mussa584b4462014-06-20 15:13:28 -07002603
Simran Basic6f1f7a2012-10-16 10:47:46 -07002604 def get_labels(self):
2605 """Return a list of labels for this given host.
2606
2607 This is the main way to retrieve all the automatic labels for a host
2608 as it will run through all the currently implemented label functions.
2609 """
2610 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08002611 for label_function in self._LABEL_FUNCTIONS:
J. Richard Barnetteb869b222014-09-03 17:55:44 -07002612 try:
2613 label = label_function(self)
2614 except Exception as e:
2615 logging.error('Label function %s failed; ignoring it.',
2616 label_function.__name__)
2617 logging.exception(e)
2618 label = None
Simran Basic6f1f7a2012-10-16 10:47:46 -07002619 if label:
Dan Shi5beba472014-05-28 22:46:07 -07002620 if type(label) is str:
2621 labels.append(label)
2622 elif type(label) is list:
2623 labels.extend(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -07002624 return labels
Dan Shi85276d42014-04-08 22:11:45 -07002625
2626
2627 def is_boot_from_usb(self):
2628 """Check if DUT is boot from USB.
2629
2630 @return: True if DUT is boot from usb.
2631 """
2632 device = self.run('rootdev -s -d').stdout.strip()
2633 removable = int(self.run('cat /sys/block/%s/removable' %
2634 os.path.basename(device)).stdout.strip())
2635 return removable == 1
Helen Zhang17dae2b2014-11-11 09:25:52 -08002636
2637
2638 def read_from_meminfo(self, key):
Dan Shi49ca0932014-11-14 11:22:27 -08002639 """Return the memory info from /proc/meminfo
Helen Zhang17dae2b2014-11-11 09:25:52 -08002640
2641 @param key: meminfo requested
2642
2643 @return the memory value as a string
2644
2645 """
Helen Zhang17dae2b2014-11-11 09:25:52 -08002646 meminfo = self.run('grep %s /proc/meminfo' % key).stdout.strip()
2647 logging.debug('%s', meminfo)
2648 return int(re.search(r'\d+', meminfo).group(0))