blob: 9319475e50b152df318897dfbe6b02f4f8b6ccfa [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
mussa584b4462014-06-20 15:13:28 -07005import ConfigParser
Aviv Keshet74c89a92013-02-04 15:18:30 -08006import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07007import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import logging
Dan Shi0f466e82013-02-22 15:44:58 -08009import os
Simran Basid5e5e272012-09-24 15:23:59 -070010import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080011import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070014import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070015
mussa584b4462014-06-20 15:13:28 -070016import common
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.bin import utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070018from autotest_lib.client.common_lib import autotemp
Richard Barnette0c73ffc2012-11-19 15:21:18 -080019from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080022from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080023from autotest_lib.client.common_lib.cros import retry
Dan Shi7dca56e2014-11-11 17:07:56 -080024from autotest_lib.client.common_lib.cros.graphite import es_utils
Michael Liangda8c60a2014-06-03 13:24:51 -070025from autotest_lib.client.common_lib.cros.graphite import stats
MK Ryu35d661e2014-09-25 17:44:10 -070026from autotest_lib.client.cros import constants as client_constants
J. Richard Barnette84890bd2014-02-21 11:05:47 -080027from autotest_lib.client.cros import cros_ui
MK Ryu35d661e2014-09-25 17:44:10 -070028from autotest_lib.server import autoserv_parser
29from autotest_lib.server import autotest
30from autotest_lib.server import constants
31from autotest_lib.server import crashcollect
Dan Shia1ecd5c2013-06-06 11:21:31 -070032from autotest_lib.server import utils as server_utils
Dan Shi9cb0eec2014-06-03 09:04:50 -070033from autotest_lib.server.cros import provision
Scott Zawalski89c44dd2013-02-26 09:28:02 -050034from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070035from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Dan Shi9cb0eec2014-06-03 09:04:50 -070036from autotest_lib.server.cros.faft.config.config import Config as FAFTConfig
Fang Deng96667ca2013-08-01 17:46:18 -070037from autotest_lib.server.hosts import abstract_ssh
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +080038from autotest_lib.server.hosts import chameleon_host
Fang Deng5d518f42013-08-02 14:04:32 -070039from autotest_lib.server.hosts import servo_host
Simran Basidcff4252012-11-20 16:13:20 -080040from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070041
42
beeps32a63082013-08-22 14:02:29 -070043try:
44 import jsonrpclib
45except ImportError:
46 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070047
Fang Dengd1c2b732013-08-20 12:59:46 -070048
beepsc87ff602013-07-31 21:53:00 -070049class FactoryImageCheckerException(error.AutoservError):
50 """Exception raised when an image is a factory image."""
51 pass
52
53
Aviv Keshet74c89a92013-02-04 15:18:30 -080054def add_label_detector(label_function_list, label_list=None, label=None):
55 """Decorator used to group functions together into the provided list.
56 @param label_function_list: List of label detecting functions to add
57 decorated function to.
58 @param label_list: List of detectable labels to add detectable labels to.
59 (Default: None)
60 @param label: Label string that is detectable by this detection function
61 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080062 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070063 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080064 """
65 @param func: The function to be added as a detector.
66 """
67 label_function_list.append(func)
68 if label and label_list is not None:
69 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070070 return func
71 return add_func
72
73
Fang Deng0ca40e22013-08-27 17:47:44 -070074class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070075 """Chromium OS specific subclass of Host."""
76
77 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050078 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070079
Richard Barnette03a0c132012-11-05 12:40:35 -080080 # Timeout values (in seconds) associated with various Chrome OS
81 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070082 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080083 # In general, a good rule of thumb is that the timeout can be up
84 # to twice the typical measured value on the slowest platform.
85 # The times here have not necessarily been empirically tested to
86 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070087 #
88 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080089 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
90 # time to restart the netwowrk.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080091 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070092 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080093 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080094 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070095 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080096 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080097 # network.
beepsf079cfb2013-09-18 17:49:51 -070098 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnette84890bd2014-02-21 11:05:47 -080099 # POWERWASH_BOOT_TIMEOUT: Time to allow for a reboot that
100 # includes powerwash.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700101
102 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800103 RESUME_TIMEOUT = 10
Tom Wai-Hong Tam4d169ed2014-02-14 11:05:40 +0800104 SHUTDOWN_TIMEOUT = 5
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700105 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700106 USB_BOOT_TIMEOUT = 150
J. Richard Barnette7817b052014-08-28 09:47:29 -0700107 INSTALL_TIMEOUT = 480
Dan Shi2c88eed2013-11-12 10:18:38 -0800108 POWERWASH_BOOT_TIMEOUT = 60
Chris Sosab76e0ee2013-05-22 16:55:41 -0700109
J. Richard Barnette84890bd2014-02-21 11:05:47 -0800110 # REBOOT_TIMEOUT: How long to wait for a reboot.
111 #
Chris Sosab76e0ee2013-05-22 16:55:41 -0700112 # We have a long timeout to ensure we don't flakily fail due to other
113 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
Simran Basi1160e2c2013-10-04 16:00:24 -0700114 # TODO(sbasi - crbug.com/276094) Restore to 5 mins once the 'host did not
115 # return from reboot' bug is solved.
116 REBOOT_TIMEOUT = 480
Chris Sosab76e0ee2013-05-22 16:55:41 -0700117
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800118 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
119 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
120 _USB_POWER_TIMEOUT = 5
121 _POWER_CYCLE_TIMEOUT = 10
122
beeps32a63082013-08-22 14:02:29 -0700123 _RPC_PROXY_URL = 'http://localhost:%d'
Christopher Wileydd181852013-10-10 19:56:58 -0700124 _RPC_SHUTDOWN_POLLING_PERIOD_SECONDS = 2
Peter Qiu4410db72014-06-05 10:32:41 -0700125 # Set shutdown timeout to account for the time for restarting the UI.
126 _RPC_SHUTDOWN_TIMEOUT_SECONDS = cros_ui.RESTART_UI_TIMEOUT
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800127
Richard Barnette82c35912012-11-20 10:09:10 -0800128 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
129 'rpm_recovery_boards', type=str).split(',')
130
131 _MAX_POWER_CYCLE_ATTEMPTS = 6
132 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
Fang Dengdeba14f2014-11-14 11:54:09 -0800133 _RPM_HOSTNAME_REGEX = ('chromeos(\d+)(-row(\d+))?-rack(\d+[a-z]*)'
134 '-host(\d+)')
Richard Barnette82c35912012-11-20 10:09:10 -0800135 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
136 'in_illuminance0_raw',
137 'illuminance0_input']
138 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
139 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800140 _DETECTABLE_LABELS = []
141 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
142 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700143
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800144 # Constants used in ping_wait_up() and ping_wait_down().
145 #
146 # _PING_WAIT_COUNT is the approximate number of polling
147 # cycles to use when waiting for a host state change.
148 #
149 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
150 # for arguments to the internal _ping_wait_for_status()
151 # method.
152 _PING_WAIT_COUNT = 40
153 _PING_STATUS_DOWN = False
154 _PING_STATUS_UP = True
155
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800156 # Allowed values for the power_method argument.
157
158 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
159 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
160 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
161 POWER_CONTROL_RPM = 'RPM'
162 POWER_CONTROL_SERVO = 'servoj10'
163 POWER_CONTROL_MANUAL = 'manual'
164
165 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
166 POWER_CONTROL_SERVO,
167 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800168
Simran Basi5e6339a2013-03-21 11:34:32 -0700169 _RPM_OUTLET_CHANGED = 'outlet_changed'
170
Dan Shi9cb0eec2014-06-03 09:04:50 -0700171 # URL pattern to download firmware image.
172 _FW_IMAGE_URL_PATTERN = global_config.global_config.get_config_value(
173 'CROS', 'firmware_url_pattern', type=str)
beeps687243d2013-07-18 15:29:27 -0700174
MK Ryu35d661e2014-09-25 17:44:10 -0700175 # File that has a list of directories to be collected
176 _LOGS_TO_COLLECT_FILE = os.path.join(
177 common.client_dir, 'common_lib', 'logs_to_collect')
178
179 # Prefix of logging message w.r.t. crash collection
180 _CRASHLOGS_PREFIX = 'collect_crashlogs'
181
182 # Time duration waiting for host up/down check
183 _CHECK_HOST_UP_TIMEOUT_SECS = 15
184
185 # A command that interacts with kernel and hardware (e.g., rm, mkdir, etc)
186 # might not be completely done deep through the hardware when the machine
187 # is powered down right after the command returns.
188 # We should wait for a few seconds to make them done. Finger crossed.
189 _SAFE_WAIT_SECS = 10
190
191
J. Richard Barnette964fba02012-10-24 17:34:29 -0700192 @staticmethod
beeps46dadc92013-11-07 14:07:10 -0800193 def check_host(host, timeout=10):
194 """
195 Check if the given host is a chrome-os host.
196
197 @param host: An ssh host representing a device.
198 @param timeout: The timeout for the run command.
199
200 @return: True if the host device is chromeos.
201
beeps46dadc92013-11-07 14:07:10 -0800202 """
203 try:
Christopher Wiley1ea80942014-02-26 16:45:08 -0800204 result = host.run('grep -q CHROMEOS /etc/lsb-release && '
Simran Basie5f7ae42014-06-26 15:44:06 -0700205 '! which adb >/dev/null 2>&1 && '
206 '! grep -q moblab /etc/lsb-release',
Christopher Wileyfc3eac02013-11-21 16:24:57 -0800207 ignore_status=True, timeout=timeout)
beeps46dadc92013-11-07 14:07:10 -0800208 except (error.AutoservRunError, error.AutoservSSHTimeout):
209 return False
210 return result.exit_status == 0
211
212
213 @staticmethod
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800214 def _extract_arguments(args_dict, key_subset):
215 """Extract options from `args_dict` and return a subset result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800216
217 Take the provided dictionary of argument options and return
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800218 a subset that represent standard arguments needed to construct
219 a test-assistant object (chameleon or servo) for a host. The
220 intent is to provide standard argument processing from
221 run_remote_tests for tests that require a test-assistant board
222 to operate.
223
224 @param args_dict Dictionary from which to extract the arguments.
225 @param key_subset Tuple of keys to extract from the args_dict, e.g.
226 ('servo_host', 'servo_port').
227 """
228 result = {}
229 for arg in key_subset:
230 if arg in args_dict:
231 result[arg] = args_dict[arg]
232 return result
233
234
235 @staticmethod
236 def get_chameleon_arguments(args_dict):
237 """Extract chameleon options from `args_dict` and return the result.
238
239 Recommended usage:
240 ~~~~~~~~
241 args_dict = utils.args_to_dict(args)
242 chameleon_args = hosts.CrosHost.get_chameleon_arguments(args_dict)
243 host = hosts.create_host(machine, chameleon_args=chameleon_args)
244 ~~~~~~~~
245
246 @param args_dict Dictionary from which to extract the chameleon
247 arguments.
248 """
249 return CrosHost._extract_arguments(
250 args_dict, ('chameleon_host', 'chameleon_port'))
251
252
253 @staticmethod
254 def get_servo_arguments(args_dict):
255 """Extract servo options from `args_dict` and return the result.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800256
257 Recommended usage:
258 ~~~~~~~~
259 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700260 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800261 host = hosts.create_host(machine, servo_args=servo_args)
262 ~~~~~~~~
263
264 @param args_dict Dictionary from which to extract the servo
265 arguments.
266 """
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800267 return CrosHost._extract_arguments(
268 args_dict, ('servo_host', 'servo_port'))
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700269
J. Richard Barnette964fba02012-10-24 17:34:29 -0700270
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800271 def _initialize(self, hostname, chameleon_args=None, servo_args=None,
272 ssh_verbosity_flag='', ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700273 *args, **dargs):
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800274 """Initialize superclasses, |self.chameleon|, and |self.servo|.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700275
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800276 This method checks whether a chameleon/servo (aka
277 test-assistant objects) is required by checking whether
278 chameleon_args/servo_args is None. This method will only
279 attempt to create the test-assistant object when it is
280 required by the test.
Fang Deng5d518f42013-08-02 14:04:32 -0700281
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800282 For creating the test-assistant object, there are three
283 possibilities: First, if the host is a lab system known to have
284 a test-assistant board, we connect to that board unconditionally.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700285 Second, if we're called from a control file that requires
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800286 test-assistant features for testing, it will pass settings from
287 the arguments, like `servo_host`, `servo_port`. If neither of
288 these cases apply, the test-assistant object will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700289
290 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700291 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700292 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700293 # self.env is a dictionary of environment variable settings
294 # to be exported for commands run on the host.
295 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
296 # errors that might happen.
297 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700298 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700299 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700300 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700301 # TODO(fdeng): We need to simplify the
302 # process of servo and servo_host initialization.
303 # crbug.com/298432
Dan Shi4d478522014-02-14 13:46:32 -0800304 self._servo_host = servo_host.create_servo_host(dut=self.hostname,
305 servo_args=servo_args)
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800306 # TODO(waihong): Do the simplication on Chameleon too.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800307 self._chameleon_host = chameleon_host.create_chameleon_host(
308 dut=self.hostname, chameleon_args=chameleon_args)
309
Dan Shi4d478522014-02-14 13:46:32 -0800310 if self._servo_host is not None:
311 self.servo = self._servo_host.get_servo()
312 else:
313 self.servo = None
314
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800315 if self._chameleon_host:
Tom Wai-Hong Tameaee3402014-01-22 08:52:10 +0800316 self.chameleon = self._chameleon_host.create_chameleon_board()
Tom Wai-Hong Tamefe1c7f2014-01-02 14:00:11 +0800317 else:
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +0800318 self.chameleon = None
Fang Deng5d518f42013-08-02 14:04:32 -0700319
320
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500321 def get_repair_image_name(self):
322 """Generate a image_name from variables in the global config.
323
324 @returns a str of $board-version/$BUILD.
325
326 """
327 stable_version = global_config.global_config.get_config_value(
328 'CROS', 'stable_cros_version')
329 build_pattern = global_config.global_config.get_config_value(
330 'CROS', 'stable_build_pattern')
331 board = self._get_board_from_afe()
332 if board is None:
333 raise error.AutoservError('DUT has no board attribute, '
334 'cannot be repaired.')
335 return build_pattern % (board, stable_version)
336
337
Scott Zawalski62bacae2013-03-05 10:40:32 -0500338 def _host_in_AFE(self):
339 """Check if the host is an object the AFE knows.
340
341 @returns the host object.
342 """
343 return self._AFE.get_hosts(hostname=self.hostname)
344
345
Chris Sosab76e0ee2013-05-22 16:55:41 -0700346 def lookup_job_repo_url(self):
347 """Looks up the job_repo_url for the host.
348
349 @returns job_repo_url from AFE or None if not found.
350
351 @raises KeyError if the host does not have a job_repo_url
352 """
353 if not self._host_in_AFE():
354 return None
355
356 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700357 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
358 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700359
360
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500361 def clear_cros_version_labels_and_job_repo_url(self):
362 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500363 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400364 return
365
Scott Zawalski62bacae2013-03-05 10:40:32 -0500366 host_list = [self.hostname]
367 labels = self._AFE.get_labels(
368 name__startswith=ds_constants.VERSION_PREFIX,
369 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800370
Scott Zawalski62bacae2013-03-05 10:40:32 -0500371 for label in labels:
372 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500373
beepscb6f1e22013-06-28 19:14:10 -0700374 self.update_job_repo_url(None, None)
375
376
377 def update_job_repo_url(self, devserver_url, image_name):
378 """
379 Updates the job_repo_url host attribute and asserts it's value.
380
381 @param devserver_url: The devserver to use in the job_repo_url.
382 @param image_name: The name of the image to use in the job_repo_url.
383
384 @raises AutoservError: If we failed to update the job_repo_url.
385 """
386 repo_url = None
387 if devserver_url and image_name:
388 repo_url = tools.get_package_url(devserver_url, image_name)
389 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500390 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700391 if self.lookup_job_repo_url() != repo_url:
392 raise error.AutoservError('Failed to update job_repo_url with %s, '
393 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500394
395
Dan Shie9309262013-06-19 22:50:21 -0700396 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400397 """Add cros_version labels and host attribute job_repo_url.
398
399 @param image_name: The name of the image e.g.
400 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700401
Scott Zawalskieadbf702013-03-14 09:23:06 -0400402 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500403 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400404 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500405
Scott Zawalskieadbf702013-03-14 09:23:06 -0400406 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700407 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500408
409 labels = self._AFE.get_labels(name=cros_label)
410 if labels:
411 label = labels[0]
412 else:
413 label = self._AFE.create_label(name=cros_label)
414
415 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700416 self.update_job_repo_url(devserver_url, image_name)
417
418
beepsdae65fd2013-07-26 16:24:41 -0700419 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700420 """
421 Make sure job_repo_url of this host is valid.
422
joychen03eaad92013-06-26 09:55:21 -0700423 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700424 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
425 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
426 download and extract it. If the devserver embedded in the url is
427 unresponsive, update the job_repo_url of the host after staging it on
428 another devserver.
429
430 @param job_repo_url: A url pointing to the devserver where the autotest
431 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700432 @param tag: The tag from the server job, in the format
433 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700434
435 @raises DevServerException: If we could not resolve a devserver.
436 @raises AutoservError: If we're unable to save the new job_repo_url as
437 a result of choosing a new devserver because the old one failed to
438 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700439 @raises urllib2.URLError: If the devserver embedded in job_repo_url
440 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700441 """
442 job_repo_url = self.lookup_job_repo_url()
443 if not job_repo_url:
444 logging.warning('No job repo url set on host %s', self.hostname)
445 return
446
447 logging.info('Verifying job repo url %s', job_repo_url)
448 devserver_url, image_name = tools.get_devserver_build_from_package_url(
449 job_repo_url)
450
beeps0c865032013-07-30 11:37:06 -0700451 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700452
453 logging.info('Staging autotest artifacts for %s on devserver %s',
454 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700455
456 start_time = time.time()
Simran Basi25e7a922014-10-31 11:56:10 -0700457 ds.stage_artifacts(image_name, ['autotest_packages'])
beeps687243d2013-07-18 15:29:27 -0700458 stage_time = time.time() - start_time
459
460 # Record how much of the verification time comes from a devserver
461 # restage. If we're doing things right we should not see multiple
462 # devservers for a given board/build/branch path.
463 try:
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800464 board, build_type, branch = server_utils.ParseBuildName(
beeps687243d2013-07-18 15:29:27 -0700465 image_name)[:3]
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800466 except server_utils.ParseBuildNameException:
beeps687243d2013-07-18 15:29:27 -0700467 pass
468 else:
beeps0c865032013-07-30 11:37:06 -0700469 devserver = devserver_url[
Chris Sosa65425082013-10-16 13:26:22 -0700470 devserver_url.find('/') + 2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700471 stats_key = {
472 'board': board,
473 'build_type': build_type,
474 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700475 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700476 }
477 stats.Gauge('verify_job_repo_url').send(
478 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
479 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700480
Scott Zawalskieadbf702013-03-14 09:23:06 -0400481
Dan Shi0f466e82013-02-22 15:44:58 -0800482 def _try_stateful_update(self, update_url, force_update, updater):
483 """Try to use stateful update to initialize DUT.
484
485 When DUT is already running the same version that machine_install
486 tries to install, stateful update is a much faster way to clean up
487 the DUT for testing, compared to a full reimage. It is implemeted
488 by calling autoupdater.run_update, but skipping updating root, as
489 updating the kernel is time consuming and not necessary.
490
491 @param update_url: url of the image.
492 @param force_update: Set to True to update the image even if the DUT
493 is running the same version.
494 @param updater: ChromiumOSUpdater instance used to update the DUT.
495 @returns: True if the DUT was updated with stateful update.
496
497 """
J. Richard Barnette3f731032014-04-07 17:42:59 -0700498 # TODO(jrbarnette): Yes, I hate this re.match() test case.
499 # It's better than the alternative: see crbug.com/360944.
500 image_name = autoupdater.url_to_image_name(update_url)
501 release_pattern = r'^.*-release/R[0-9]+-[0-9]+\.[0-9]+\.0$'
502 if not re.match(release_pattern, image_name):
503 return False
Dan Shi0f466e82013-02-22 15:44:58 -0800504 if not updater.check_version():
505 return False
506 if not force_update:
507 logging.info('Canceling stateful update because the new and '
508 'old versions are the same.')
509 return False
510 # Following folders should be rebuilt after stateful update.
511 # A test file is used to confirm each folder gets rebuilt after
512 # the stateful update.
513 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
514 test_file = '.test_file_to_be_deleted'
515 for folder in folders_to_check:
516 touch_path = os.path.join(folder, test_file)
517 self.run('touch %s' % touch_path)
518
519 if not updater.run_update(force_update=True, update_root=False):
520 return False
521
522 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700523 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800524 check_file_cmd = 'test -f %s; echo $?'
525 for folder in folders_to_check:
526 test_file_path = os.path.join(folder, test_file)
527 result = self.run(check_file_cmd % test_file_path,
528 ignore_status=True)
529 if result.exit_status == 1:
530 return False
531 return True
532
533
J. Richard Barnette7275b612013-06-04 18:13:11 -0700534 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800535 """After the DUT is updated, confirm machine_install succeeded.
536
537 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700538 @param expected_kernel: kernel expected to be active after reboot,
539 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800540
541 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700542 # Touch the lab machine file to leave a marker that
543 # distinguishes this image from other test images.
544 # Afterwards, we must re-run the autoreboot script because
545 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800546 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800547 self.run('start autoreboot')
Chris Sosa65425082013-10-16 13:26:22 -0700548 updater.verify_boot_expectations(
549 expected_kernel, rollback_message=
550 'Build %s failed to boot on %s; system rolled back to previous'
551 'build' % (updater.update_version, self.hostname))
J. Richard Barnette7275b612013-06-04 18:13:11 -0700552 # Check that we've got the build we meant to install.
553 if not updater.check_version_to_confirm_install():
554 raise autoupdater.ChromiumOSError(
555 'Failed to update %s to build %s; found build '
556 '%s instead' % (self.hostname,
Chris Sosa65425082013-10-16 13:26:22 -0700557 updater.update_version,
558 updater.get_build_id()))
Dan Shi0f466e82013-02-22 15:44:58 -0800559
560
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700561 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400562 """Stage a build on a devserver and return the update_url.
563
564 @param image_name: a name like lumpy-release/R27-3837.0.0
565 @returns an update URL like:
566 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
567 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700568 if not image_name:
569 image_name = self.get_repair_image_name()
570 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400571 devserver = dev_server.ImageServer.resolve(image_name)
572 devserver.trigger_download(image_name, synchronous=False)
573 return tools.image_url_pattern() % (devserver.url(), image_name)
574
575
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700576 def stage_image_for_servo(self, image_name=None):
577 """Stage a build on a devserver and return the update_url.
578
579 @param image_name: a name like lumpy-release/R27-3837.0.0
580 @returns an update URL like:
581 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
582 """
583 if not image_name:
584 image_name = self.get_repair_image_name()
585 logging.info('Staging build for servo install: %s', image_name)
586 devserver = dev_server.ImageServer.resolve(image_name)
587 devserver.stage_artifacts(image_name, ['test_image'])
588 return devserver.get_test_image_url(image_name)
589
590
beepse539be02013-07-31 21:57:39 -0700591 def stage_factory_image_for_servo(self, image_name):
592 """Stage a build on a devserver and return the update_url.
593
594 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700595
beepse539be02013-07-31 21:57:39 -0700596 @return: An update URL, eg:
597 http://<devserver>/static/canary-channel/\
598 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700599
600 @raises: ValueError if the factory artifact name is missing from
601 the config.
602
beepse539be02013-07-31 21:57:39 -0700603 """
604 if not image_name:
605 logging.error('Need an image_name to stage a factory image.')
606 return
607
beeps12c0a3c2013-09-03 11:58:27 -0700608 factory_artifact = global_config.global_config.get_config_value(
609 'CROS', 'factory_artifact', type=str, default='')
610 if not factory_artifact:
611 raise ValueError('Cannot retrieve the factory artifact name from '
612 'autotest config, and hence cannot stage factory '
613 'artifacts.')
614
beepse539be02013-07-31 21:57:39 -0700615 logging.info('Staging build for servo install: %s', image_name)
616 devserver = dev_server.ImageServer.resolve(image_name)
617 devserver.stage_artifacts(
618 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700619 [factory_artifact],
620 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700621
622 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
623
624
Chris Sosaa3ac2152012-05-23 22:23:13 -0700625 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500626 local_devserver=False, repair=False):
627 """Install the DUT.
628
Dan Shi0f466e82013-02-22 15:44:58 -0800629 Use stateful update if the DUT is already running the same build.
630 Stateful update does not update kernel and tends to run much faster
631 than a full reimage. If the DUT is running a different build, or it
632 failed to do a stateful update, full update, including kernel update,
633 will be applied to the DUT.
634
Scott Zawalskieadbf702013-03-14 09:23:06 -0400635 Once a host enters machine_install its cros_version label will be
636 removed as well as its host attribute job_repo_url (used for
637 package install).
638
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500639 @param update_url: The url to use for the update
640 pattern: http://$devserver:###/update/$build
641 If update_url is None and repair is True we will install the
642 stable image listed in global_config under
643 CROS.stable_cros_version.
644 @param force_update: Force an update even if the version installed
645 is the same. Default:False
646 @param local_devserver: Used by run_remote_test to allow people to
647 use their local devserver. Default: False
648 @param repair: Whether or not we are in repair mode. This adds special
649 cases for repairing a machine like starting update_engine.
650 Setting repair to True sets force_update to True as well.
651 default: False
652 @raises autoupdater.ChromiumOSError
653
654 """
Dan Shi7458bf62013-06-10 12:50:16 -0700655 if update_url:
656 logging.debug('update url is set to %s', update_url)
657 else:
658 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700659 if self._parser.options.image:
660 requested_build = self._parser.options.image
661 if requested_build.startswith('http://'):
662 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700663 logging.debug('update url is retrieved from requested_build'
664 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700665 else:
666 # Try to stage any build that does not start with
667 # http:// on the devservers defined in
668 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700669 update_url = self._stage_image_for_update(requested_build)
670 logging.debug('Build staged, and update_url is set to: %s',
671 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700672 elif repair:
673 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700674 logging.debug('Build staged, and update_url is set to: %s',
675 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400676 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700677 raise autoupdater.ChromiumOSError(
678 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500679
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500680 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800681 # In case the system is in a bad state, we always reboot the machine
682 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700683 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500684 self.run('stop update-engine; start update-engine')
685 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800686
Chris Sosaa3ac2152012-05-23 22:23:13 -0700687 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700688 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800689 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400690 # Remove cros-version and job_repo_url host attribute from host.
691 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800692 # If the DUT is already running the same build, try stateful update
693 # first. Stateful update does not update kernel and tends to run much
694 # faster than a full reimage.
695 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700696 updated = self._try_stateful_update(
697 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800698 if updated:
699 logging.info('DUT is updated with stateful update.')
700 except Exception as e:
701 logging.exception(e)
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700702 logging.warning('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700703
Dan Shi0f466e82013-02-22 15:44:58 -0800704 inactive_kernel = None
705 # Do a full update if stateful update is not applicable or failed.
706 if not updated:
707 # In case the system is in a bad state, we always reboot the
708 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700709 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700710
711 # TODO(sosa): Remove temporary hack to get rid of bricked machines
712 # that can't update due to a corrupted policy.
713 self.run('rm -rf /var/lib/whitelist')
Danny Chanad490bd2014-11-04 14:15:57 -0800714 self.run('mkdir /var/lib/whitelist')
Chris Sosab7612bc2013-03-21 10:32:37 -0700715 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400716 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700717
Dan Shi0f466e82013-02-22 15:44:58 -0800718 if updater.run_update(force_update):
719 updated = True
720 # Figure out active and inactive kernel.
721 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700722
Dan Shi0f466e82013-02-22 15:44:58 -0800723 # Ensure inactive kernel has higher priority than active.
724 if (updater.get_kernel_priority(inactive_kernel)
725 < updater.get_kernel_priority(active_kernel)):
726 raise autoupdater.ChromiumOSError(
727 'Update failed. The priority of the inactive kernel'
728 ' partition is less than that of the active kernel'
729 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700730
Dan Shi0f466e82013-02-22 15:44:58 -0800731 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700732 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700733
Dan Shi0f466e82013-02-22 15:44:58 -0800734 if updated:
735 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400736 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700737 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800738
Simran Basiae08c8c2014-09-02 11:17:26 -0700739 logging.debug('Cleaning up old autotest directories.')
740 try:
741 installed_autodir = autotest.Autotest.get_installed_autodir(self)
742 self.run('rm -rf ' + installed_autodir)
743 except autotest.AutodirNotFoundError:
744 logging.debug('No autotest installed directory found.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700745
746
Dan Shi9cb0eec2014-06-03 09:04:50 -0700747 def _clear_fw_version_labels(self):
748 """Clear firmware version labels from the machine."""
749 labels = self._AFE.get_labels(
750 name__startswith=provision.FW_VERSION_PREFIX,
751 host__hostname=self.hostname)
752 for label in labels:
753 label.remove_hosts(hosts=[self.hostname])
754
755
756 def _add_fw_version_label(self, build):
757 """Add firmware version label to the machine.
758
759 @param build: Build of firmware.
760
761 """
762 fw_label = provision.fw_version_to_label(build)
763 provision.ensure_label_exists(fw_label)
764 label = self._AFE.get_labels(name__startswith=fw_label)[0]
765 label.add_hosts([self.hostname])
766
767
768 def firmware_install(self, build=None):
769 """Install firmware to the DUT.
770
771 Use stateful update if the DUT is already running the same build.
772 Stateful update does not update kernel and tends to run much faster
773 than a full reimage. If the DUT is running a different build, or it
774 failed to do a stateful update, full update, including kernel update,
775 will be applied to the DUT.
776
777 Once a host enters firmware_install its fw_version label will be
778 removed. After the firmware is updated successfully, a new fw_version
779 label will be added to the host.
780
781 @param build: The build version to which we want to provision the
782 firmware of the machine,
783 e.g. 'link-firmware/R22-2695.1.144'.
784
785 TODO(dshi): After bug 381718 is fixed, update here with corresponding
786 exceptions that could be raised.
787
788 """
789 if not self.servo:
790 raise error.TestError('Host %s does not have servo.' %
791 self.hostname)
792
793 # TODO(fdeng): use host.get_board() after
794 # crbug.com/271834 is fixed.
795 board = self._get_board_from_afe()
796
797 # If build is not set, assume it's repair mode and try to install
798 # firmware from stable CrOS.
799 if not build:
800 build = self.get_repair_image_name()
801
802 config = FAFTConfig(board)
803 if config.use_u_boot:
804 ap_image = 'image-%s.bin' % board
805 else: # Depthcharge platform
806 ap_image = 'image.bin'
807 ec_image = 'ec.bin'
808 ds = dev_server.ImageServer.resolve(build)
809 ds.stage_artifacts(build, ['firmware'])
810
811 tmpd = autotemp.tempdir(unique_id='fwimage')
812 try:
813 fwurl = self._FW_IMAGE_URL_PATTERN % (ds.url(), build)
814 local_tarball = os.path.join(tmpd.name, os.path.basename(fwurl))
815 server_utils.system('wget -O %s %s' % (local_tarball, fwurl),
816 timeout=60)
817 server_utils.system('tar xf %s -C %s %s %s' %
818 (local_tarball, tmpd.name, ap_image, ec_image),
819 timeout=60)
820 server_utils.system('tar xf %s --wildcards -C %s "dts/*"' %
821 (local_tarball, tmpd.name),
822 timeout=60, ignore_status=True)
823
824 self._clear_fw_version_labels()
825 logging.info('Will re-program EC now')
826 self.servo.program_ec(os.path.join(tmpd.name, ec_image))
827 logging.info('Will re-program BIOS now')
828 self.servo.program_bios(os.path.join(tmpd.name, ap_image))
829 self.servo.get_power_state_controller().reset()
830 time.sleep(self.servo.BOOT_DELAY)
831 self._add_fw_version_label()
832 finally:
833 tmpd.clean()
834
835
Dan Shi10e992b2013-08-30 11:02:59 -0700836 def show_update_engine_log(self):
837 """Output update engine log."""
MK Ryu35d661e2014-09-25 17:44:10 -0700838 logging.debug('Dumping %s', client_constants.UPDATE_ENGINE_LOG)
839 self.run('cat %s' % client_constants.UPDATE_ENGINE_LOG)
Dan Shi10e992b2013-08-30 11:02:59 -0700840
841
Richard Barnette82c35912012-11-20 10:09:10 -0800842 def _get_board_from_afe(self):
843 """Retrieve this host's board from its labels in the AFE.
844
845 Looks for a host label of the form "board:<board>", and
846 returns the "<board>" part of the label. `None` is returned
847 if there is not a single, unique label matching the pattern.
848
849 @returns board from label, or `None`.
850 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700851 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800852
853
854 def get_build(self):
855 """Retrieve the current build for this Host from the AFE.
856
857 Looks through this host's labels in the AFE to determine its build.
858
859 @returns The current build or None if it could not find it or if there
860 were multiple build labels assigned to this host.
861 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700862 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800863
864
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500865 def _install_repair(self):
866 """Attempt to repair this host using upate-engine.
867
868 If the host is up, try installing the DUT with a stable
869 "repair" version of Chrome OS as defined in the global_config
870 under CROS.stable_cros_version.
871
Scott Zawalski62bacae2013-03-05 10:40:32 -0500872 @raises AutoservRepairMethodNA if the DUT is not reachable.
873 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500874
875 """
876 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500877 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500878 logging.info('Attempting to reimage machine to repair image.')
879 try:
880 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700881 except autoupdater.ChromiumOSError as e:
882 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500883 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500884 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500885
886
Dan Shi2c88eed2013-11-12 10:18:38 -0800887 def _install_repair_with_powerwash(self):
Dan Shi9cc48452013-11-12 12:39:26 -0800888 """Attempt to powerwash first then repair this host using update-engine.
Dan Shi2c88eed2013-11-12 10:18:38 -0800889
Dan Shi9cc48452013-11-12 12:39:26 -0800890 update-engine may fail due to a bad image. In such case, powerwash
891 may help to cleanup the DUT for update-engine to work again.
Dan Shi2c88eed2013-11-12 10:18:38 -0800892
893 @raises AutoservRepairMethodNA if the DUT is not reachable.
894 @raises ChromiumOSError if the install failed for some reason.
895
896 """
897 if not self.is_up():
898 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
899
900 logging.info('Attempting to powerwash the DUT.')
901 self.run('echo "fast safe" > '
902 '/mnt/stateful_partition/factory_install_reset')
903 self.reboot(timeout=self.POWERWASH_BOOT_TIMEOUT, wait=True)
904 if not self.is_up():
Dan Shi9cc48452013-11-12 12:39:26 -0800905 logging.error('Powerwash failed. DUT did not come back after '
Dan Shi2c88eed2013-11-12 10:18:38 -0800906 'reboot.')
907 raise error.AutoservRepairFailure(
908 'DUT failed to boot from powerwash after %d seconds' %
909 self.POWERWASH_BOOT_TIMEOUT)
910
911 logging.info('Powerwash succeeded.')
912 self._install_repair()
913
914
beepsf079cfb2013-09-18 17:49:51 -0700915 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
916 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500917 """
918 Re-install the OS on the DUT by:
919 1) installing a test image on a USB storage device attached to the Servo
920 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800921 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700922 3) installing the image with chromeos-install.
923
Scott Zawalski62bacae2013-03-05 10:40:32 -0500924 @param image_url: If specified use as the url to install on the DUT.
925 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700926 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
927 Factory images need a longer usb_boot_timeout than regular
928 cros images.
929 @param install_timeout: The timeout to use when installing the chromeos
930 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800931
Scott Zawalski62bacae2013-03-05 10:40:32 -0500932 @raises AutoservError if the image fails to boot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800933 """
beepsf079cfb2013-09-18 17:49:51 -0700934
935 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
936 % usb_boot_timeout)
937 logging.info('Downloading image to USB, then booting from it. Usb boot '
938 'timeout = %s', usb_boot_timeout)
939 timer = stats.Timer(usb_boot_timer_key)
940 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700941 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700942 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500943 raise error.AutoservRepairFailure(
944 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700945 usb_boot_timeout)
946 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500947
beepsf079cfb2013-09-18 17:49:51 -0700948 install_timer_key = ('servo_install.install_timeout_%s'
949 % install_timeout)
950 timer = stats.Timer(install_timer_key)
951 timer.start()
952 logging.info('Installing image through chromeos-install.')
MK Ryu35d661e2014-09-25 17:44:10 -0700953 self.run('chromeos-install --yes --lab_preserve_logs=%s' %
954 self._LOGS_TO_COLLECT_FILE,
955 timeout=install_timeout)
beepsf079cfb2013-09-18 17:49:51 -0700956 timer.stop()
957
958 logging.info('Power cycling DUT through servo.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800959 self.servo.power_long_press()
Fang Dengafb88142013-05-30 17:44:31 -0700960 self.servo.switch_usbkey('off')
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700961 # We *must* use power_on() here; on Parrot it's how we get
962 # out of recovery mode.
963 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -0700964
965 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800966 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
967 raise error.AutoservError('DUT failed to reboot installed '
968 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500969 self.BOOT_TIMEOUT)
970
971
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700972 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500973 """Reinstall the DUT utilizing servo and a test image.
974
975 Re-install the OS on the DUT by:
976 1) installing a test image on a USB storage device attached to the Servo
977 board,
978 2) booting that image in recovery mode, and then
979 3) installing the image with chromeos-install.
980
Scott Zawalski62bacae2013-03-05 10:40:32 -0500981 @raises AutoservRepairMethodNA if the device does not have servo
982 support.
983
984 """
985 if not self.servo:
986 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
987 'DUT has no servo support.')
988
989 logging.info('Attempting to recovery servo enabled device with '
990 'servo_repair_reinstall')
991
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700992 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500993 self.servo_install(image_url)
994
995
996 def _servo_repair_power(self):
997 """Attempt to repair DUT using an attached Servo.
998
999 Attempt to power on the DUT via power_long_press.
1000
1001 @raises AutoservRepairMethodNA if the device does not have servo
1002 support.
1003 @raises AutoservRepairFailure if the repair fails for any reason.
1004 """
1005 if not self.servo:
1006 raise error.AutoservRepairMethodNA('Repair Power NA: '
1007 'DUT has no servo support.')
1008
1009 logging.info('Attempting to recover servo enabled device by '
1010 'powering it off and on.')
1011 self.servo.get_power_state_controller().power_off()
1012 self.servo.get_power_state_controller().power_on()
1013 if self.wait_up(self.BOOT_TIMEOUT):
1014 return
1015
1016 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -08001017
1018
Richard Barnette82c35912012-11-20 10:09:10 -08001019 def _powercycle_to_repair(self):
1020 """Utilize the RPM Infrastructure to bring the host back up.
1021
1022 If the host is not up/repaired after the first powercycle we utilize
1023 auto fallback to the last good install by powercycling and rebooting the
1024 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001025
1026 @raises AutoservRepairMethodNA if the device does not support remote
1027 power.
1028 @raises AutoservRepairFailure if the repair fails for any reason.
1029
Richard Barnette82c35912012-11-20 10:09:10 -08001030 """
Scott Zawalski62bacae2013-03-05 10:40:32 -05001031 if not self.has_power():
1032 raise error.AutoservRepairMethodNA('Device does not support power.')
1033
Richard Barnette82c35912012-11-20 10:09:10 -08001034 logging.info('Attempting repair via RPM powercycle.')
1035 failed_cycles = 0
1036 self.power_cycle()
1037 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
1038 failed_cycles += 1
1039 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -05001040 raise error.AutoservRepairFailure(
1041 'Powercycled host %s %d times; device did not come back'
1042 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -08001043 self.power_cycle()
1044 if failed_cycles == 0:
1045 logging.info('Powercycling was successful first time.')
1046 else:
1047 logging.info('Powercycling was successful after %d failures.',
1048 failed_cycles)
1049
1050
MK Ryu35d661e2014-09-25 17:44:10 -07001051 def _reboot_repair(self):
1052 """SSH to this host and reboot."""
1053 if not self.is_up(self._CHECK_HOST_UP_TIMEOUT_SECS):
1054 raise error.AutoservRepairMethodNA('DUT unreachable for reboot.')
1055 logging.info('Attempting repair via SSH reboot.')
1056 self.reboot(timeout=self.BOOT_TIMEOUT, wait=True)
1057
1058
Prashanth B4d8184f2014-05-05 12:22:02 -07001059 def check_device(self):
1060 """Check if a device is ssh-able, and if so, clean and verify it.
1061
1062 @raise AutoservSSHTimeout: If the ssh ping times out.
1063 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
1064 permissions.
1065 @raise AutoservSshPingHostError: For other AutoservRunErrors during
1066 ssh_ping.
1067 @raises AutoservError: As appropriate, during cleanup and verify.
1068 """
1069 self.ssh_ping()
1070 self.cleanup()
1071 self.verify()
1072
1073
Richard Barnette82c35912012-11-20 10:09:10 -08001074 def repair_full(self):
1075 """Repair a host for repair level NO_PROTECTION.
1076
1077 This overrides the base class function for repair; it does
1078 not call back to the parent class, but instead offers a
1079 simplified implementation based on the capabilities in the
1080 Chrome OS test lab.
1081
Fang Deng5d518f42013-08-02 14:04:32 -07001082 It first verifies and repairs servo if it is a DUT in CrOS
Fang Deng03590af2013-10-07 17:34:20 -07001083 lab and a servo is attached.
Fang Deng5d518f42013-08-02 14:04:32 -07001084
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001085 This escalates in order through the following procedures and verifies
1086 the status using `self.check_device()` after each of them. This is done
1087 until both the repair and the veryfing step succeed.
1088
MK Ryu35d661e2014-09-25 17:44:10 -07001089 Escalation order of repair procedures from less intrusive to
1090 more intrusive repairs:
1091 1. SSH to the DUT and reboot.
Scott Zawalski62bacae2013-03-05 10:40:32 -05001092 2. If there's a servo for the DUT, try to power the DUT off and
1093 on.
MK Ryu35d661e2014-09-25 17:44:10 -07001094 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -08001095 by power-cycling.
MK Ryu35d661e2014-09-25 17:44:10 -07001096 4. Try to re-install to a known stable image using
1097 auto-update.
1098 5. If there's a servo for the DUT, try to re-install via
1099 the servo.
Richard Barnette82c35912012-11-20 10:09:10 -08001100
1101 As with the parent method, the last operation performed on
Prashanth B4d8184f2014-05-05 12:22:02 -07001102 the DUT must be to call `self.check_device()`; If that call fails the
1103 exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -07001104
Scott Zawalski62bacae2013-03-05 10:40:32 -05001105 @raises AutoservRepairTotalFailure if the repair process fails to
1106 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -07001107 @raises ServoHostRepairTotalFailure if the repair process fails to
1108 fix the servo host if one is attached to the DUT.
1109 @raises AutoservSshPermissionDeniedError if it is unable
1110 to ssh to the servo host due to permission error.
1111
Richard Barnette82c35912012-11-20 10:09:10 -08001112 """
Jakob Juelich82b7d1c2014-09-15 16:10:57 -07001113 # Caution: Deleting shards relies on repair to always reboot the DUT.
1114
Dan Shi4d478522014-02-14 13:46:32 -08001115 if self._servo_host and not self.servo:
Fang Deng03590af2013-10-07 17:34:20 -07001116 try:
Dan Shi4d478522014-02-14 13:46:32 -08001117 self._servo_host.repair_full()
Fang Deng03590af2013-10-07 17:34:20 -07001118 except Exception as e:
Fang Deng03590af2013-10-07 17:34:20 -07001119 logging.error('Could not create a healthy servo: %s', e)
Dan Shi4d478522014-02-14 13:46:32 -08001120 self.servo = self._servo_host.get_servo()
Fang Deng5d518f42013-08-02 14:04:32 -07001121
MK Ryu35d661e2014-09-25 17:44:10 -07001122 self.try_collect_crashlogs()
1123
Scott Zawalski62bacae2013-03-05 10:40:32 -05001124 # TODO(scottz): This should use something similar to label_decorator,
1125 # but needs to be populated in order so DUTs are repaired with the
1126 # least amount of effort.
MK Ryu35d661e2014-09-25 17:44:10 -07001127 repair_funcs = [self._reboot_repair,
1128 self._servo_repair_power,
1129 self._powercycle_to_repair,
Dan Shi849a1c42014-03-05 11:10:43 -08001130 self._install_repair,
Dan Shi2c88eed2013-11-12 10:18:38 -08001131 self._install_repair_with_powerwash,
MK Ryu35d661e2014-09-25 17:44:10 -07001132 self._servo_repair_reinstall]
Scott Zawalski62bacae2013-03-05 10:40:32 -05001133 errors = []
Simran Basie6130932013-10-01 14:07:52 -07001134 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001135 for repair_func in repair_funcs:
1136 try:
1137 repair_func()
MK Ryu35d661e2014-09-25 17:44:10 -07001138 self.try_collect_crashlogs()
Prashanth B4d8184f2014-05-05 12:22:02 -07001139 self.check_device()
Simran Basie6130932013-10-01 14:07:52 -07001140 stats.Counter(
1141 '%s.SUCCEEDED' % repair_func.__name__).increment()
1142 if board:
1143 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001144 '%s.%s.SUCCEEDED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001145 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001146 return
Simran Basie6130932013-10-01 14:07:52 -07001147 except error.AutoservRepairMethodNA as e:
1148 stats.Counter(
1149 '%s.RepairNA' % repair_func.__name__).increment()
1150 if board:
1151 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001152 '%s.%s.RepairNA' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001153 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001154 logging.warning('Repair function NA: %s', e)
Simran Basie6130932013-10-01 14:07:52 -07001155 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -05001156 except Exception as e:
Simran Basie6130932013-10-01 14:07:52 -07001157 stats.Counter(
1158 '%s.FAILED' % repair_func.__name__).increment()
1159 if board:
1160 stats.Counter(
Dan Shib87c3aa2014-02-12 15:40:31 -08001161 '%s.%s.FAILED' % (repair_func.__name__,
Simran Basie6130932013-10-01 14:07:52 -07001162 board)).increment()
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001163 logging.warning('Failed to repair device: %s', e)
Scott Zawalski62bacae2013-03-05 10:40:32 -05001164 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -05001165
Simran Basie6130932013-10-01 14:07:52 -07001166 stats.Counter('Full_Repair_Failed').increment()
1167 if board:
1168 stats.Counter(
1169 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001170 raise error.AutoservRepairTotalFailure(
1171 'All attempts at repairing the device failed:\n%s' %
1172 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -08001173
1174
MK Ryu35d661e2014-09-25 17:44:10 -07001175 def try_collect_crashlogs(self, check_host_up=True):
1176 """
1177 Check if a host is up and logs need to be collected from the host,
1178 if yes, collect them.
1179
1180 @param check_host_up: Flag for checking host is up. Default is True.
1181 """
1182 try:
1183 crash_job = self._need_crash_logs()
1184 if crash_job:
1185 logging.debug('%s: Job %s was crashed', self._CRASHLOGS_PREFIX,
1186 crash_job)
1187 if not check_host_up or self.is_up(
1188 self._CHECK_HOST_UP_TIMEOUT_SECS):
1189 self._collect_crashlogs(crash_job)
1190 logging.debug('%s: Completed collecting logs for the '
1191 'crashed job %s', self._CRASHLOGS_PREFIX,
1192 crash_job)
1193 except Exception as e:
1194 # Exception should not result in repair failure.
1195 # Therefore, suppress all exceptions here.
1196 logging.error('%s: Failed while trying to collect crash-logs: %s',
1197 self._CRASHLOGS_PREFIX, e)
1198
1199
1200 def _need_crash_logs(self):
1201 """Get the value of need_crash_logs attribute of this host.
1202
1203 @return: Value string of need_crash_logs attribute
1204 None if there is no need_crash_logs attribute
1205 """
1206 attrs = self._AFE.get_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1207 hostname=self.hostname)
1208 assert len(attrs) < 2
1209 return attrs[0].value if attrs else None
1210
1211
1212 def _collect_crashlogs(self, job_id):
1213 """Grab logs from the host where a job was crashed.
1214
1215 First, check if PRIOR_LOGS_DIR exists in the host.
1216 If yes, collect them.
1217 Otherwise, check if a lab-machine marker (_LAB_MACHINE_FILE) exists
1218 in the host.
1219 If yes, the host was repaired automatically, and we collect normal
1220 system logs.
1221
1222 @param job_id: Id of the job that was crashed.
1223 """
1224 crashlogs_dir = crashcollect.get_crashinfo_dir(self,
1225 constants.CRASHLOGS_DEST_DIR_PREFIX)
1226 flag_prior_logs = False
1227
1228 if self.path_exists(client_constants.PRIOR_LOGS_DIR):
1229 flag_prior_logs = True
1230 self._collect_prior_logs(crashlogs_dir)
1231 elif self.path_exists(self._LAB_MACHINE_FILE):
1232 self._collect_system_logs(crashlogs_dir)
1233 else:
1234 logging.warning('%s: Host was manually re-installed without '
1235 '--lab_preserve_log option. Skip collecting '
1236 'crash-logs.', self._CRASHLOGS_PREFIX)
1237
1238 # We make crash collection be one-time effort.
1239 # _collect_prior_logs() and _collect_system_logs() will not throw
1240 # any exception, and following codes will be executed even when
1241 # those methods fail.
1242 # _collect_crashlogs() is called only when the host is up (refer
1243 # to try_collect_crashlogs()). We assume _collect_prior_logs() and
1244 # _collect_system_logs() fail rarely when the host is up.
1245 # In addition, it is not clear how many times we should try crash
1246 # collection again while not triggering next repair unnecessarily.
1247 # Threfore, we try crash collection one time.
1248
1249 # Create a marker file as soon as log collection is done.
1250 # Leave the job id to this marker for gs_offloader to consume.
1251 marker_file = os.path.join(crashlogs_dir, constants.CRASHLOGS_MARKER)
1252 with open(marker_file, 'a') as f:
1253 f.write('%s\n' % job_id)
1254
1255 # Remove need_crash_logs attribute
1256 logging.debug('%s: Remove attribute need_crash_logs from host %s',
1257 self._CRASHLOGS_PREFIX, self.hostname)
1258 self._AFE.set_host_attribute(constants.CRASHLOGS_HOST_ATTRIBUTE,
1259 None, hostname=self.hostname)
1260
1261 if flag_prior_logs:
1262 logging.debug('%s: Remove %s from host %s', self._CRASHLOGS_PREFIX,
1263 client_constants.PRIOR_LOGS_DIR, self.hostname)
1264 self.run('rm -rf %s; sync' % client_constants.PRIOR_LOGS_DIR)
1265 # Wait for a few seconds to make sure the prior command is
1266 # done deep through storage.
1267 time.sleep(self._SAFE_WAIT_SECS)
1268
1269
1270 def _collect_prior_logs(self, crashlogs_dir):
1271 """Grab prior logs that were stashed before re-installing a host.
1272
1273 @param crashlogs_dir: Directory path where crash-logs are stored.
1274 """
1275 logging.debug('%s: Found %s, collecting them...',
1276 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1277 try:
1278 self.collect_logs(client_constants.PRIOR_LOGS_DIR,
1279 crashlogs_dir, False)
1280 logging.debug('%s: %s is collected',
1281 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR)
1282 except Exception as e:
1283 logging.error('%s: Failed to collect %s: %s',
1284 self._CRASHLOGS_PREFIX, client_constants.PRIOR_LOGS_DIR,
1285 e)
1286
1287
1288 def _collect_system_logs(self, crashlogs_dir):
1289 """Grab normal system logs from a host.
1290
1291 @param crashlogs_dir: Directory path where crash-logs are stored.
1292 """
1293 logging.debug('%s: Found %s, collecting system logs...',
1294 self._CRASHLOGS_PREFIX, self._LAB_MACHINE_FILE)
1295 sources = server_utils.parse_simple_config(self._LOGS_TO_COLLECT_FILE)
1296 for src in sources:
1297 try:
1298 if self.path_exists(src):
1299 logging.debug('%s: Collecting %s...',
1300 self._CRASHLOGS_PREFIX, src)
1301 dest = server_utils.concat_path_except_last(
1302 crashlogs_dir, src)
1303 self.collect_logs(src, dest, False)
1304 logging.debug('%s: %s is collected',
1305 self._CRASHLOGS_PREFIX, src)
1306 except Exception as e:
1307 logging.error('%s: Failed to collect %s: %s',
1308 self._CRASHLOGS_PREFIX, src, e)
1309
1310
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001311 def close(self):
beeps32a63082013-08-22 14:02:29 -07001312 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -07001313 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001314
1315
Dan Shi49ca0932014-11-14 11:22:27 -08001316 def get_power_supply_info(self):
1317 """Get the output of power_supply_info.
1318
1319 power_supply_info outputs the info of each power supply, e.g.,
1320 Device: Line Power
1321 online: no
1322 type: Mains
1323 voltage (V): 0
1324 current (A): 0
1325 Device: Battery
1326 state: Discharging
1327 percentage: 95.9276
1328 technology: Li-ion
1329
1330 Above output shows two devices, Line Power and Battery, with details of
1331 each device listed. This function parses the output into a dictionary,
1332 with key being the device name, and value being a dictionary of details
1333 of the device info.
1334
1335 @return: The dictionary of power_supply_info, e.g.,
1336 {'Line Power': {'online': 'yes', 'type': 'main'},
1337 'Battery': {'vendor': 'xyz', 'percentage': '100'}}
1338 """
1339 result = self.run('power_supply_info').stdout.strip()
1340 info = {}
1341 device_name = None
1342 device_info = {}
1343 for line in result.split('\n'):
1344 pair = [v.strip() for v in line.split(':')]
1345 if len(pair) != 2:
1346 continue
1347 if pair[0] == 'Device':
1348 if device_name:
1349 info[device_name] = device_info
1350 device_name = pair[1]
1351 device_info = {}
1352 else:
1353 device_info[pair[0]] = pair[1]
1354 if device_name and not device_name in info:
1355 info[device_name] = device_info
1356 return info
1357
1358
1359 def get_battery_percentage(self):
1360 """Get the battery percentage.
1361
1362 @return: The percentage of battery level, value range from 0-100. Return
1363 None if the battery info cannot be retrieved.
1364 """
1365 try:
1366 info = self.get_power_supply_info()
1367 logging.info(info)
1368 return float(info['Battery']['percentage'])
1369 except KeyError, ValueError:
1370 return None
1371
1372
1373 def is_ac_connected(self):
1374 """Check if the dut has power adapter connected and charging.
1375
1376 @return: True if power adapter is connected and charging.
1377 """
1378 try:
1379 info = self.get_power_supply_info()
1380 return info['Line Power']['online'] == 'yes'
1381 except KeyError:
1382 return False
1383
1384
Simran Basi5e6339a2013-03-21 11:34:32 -07001385 def _cleanup_poweron(self):
1386 """Special cleanup method to make sure hosts always get power back."""
1387 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1388 hosts = afe.get_hosts(hostname=self.hostname)
1389 if not hosts or not (self._RPM_OUTLET_CHANGED in
1390 hosts[0].attributes):
1391 return
1392 logging.debug('This host has recently interacted with the RPM'
1393 ' Infrastructure. Ensuring power is on.')
1394 try:
1395 self.power_on()
Dan Shi7dca56e2014-11-11 17:07:56 -08001396 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1397 hostname=self.hostname)
Simran Basi5e6339a2013-03-21 11:34:32 -07001398 except rpm_client.RemotePowerException:
Simran Basi5e6339a2013-03-21 11:34:32 -07001399 logging.error('Failed to turn Power On for this host after '
1400 'cleanup through the RPM Infrastructure.')
Dan Shi7dca56e2014-11-11 17:07:56 -08001401 es_utils.ESMetadata().post(
1402 type_str='RPM_poweron_failure',
1403 metadata={'hostname': self.hostname})
Dan Shi49ca0932014-11-14 11:22:27 -08001404
1405 battery_percentage = self.get_battery_percentage()
1406 if not battery_percentage or battery_percentage < 50:
1407 raise
1408 elif self.is_ac_connected():
1409 logging.info('The device has power adapter connected and '
1410 'charging. No need to try to turn RPM on '
1411 'again.')
1412 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1413 hostname=self.hostname)
1414 logging.info('Battery level is now at %s%%. The device may '
1415 'still have enough power to run test, so no '
1416 'exception will be raised.', battery_percentage)
1417
Simran Basi5e6339a2013-03-21 11:34:32 -07001418
beepsc87ff602013-07-31 21:53:00 -07001419 def _is_factory_image(self):
1420 """Checks if the image on the DUT is a factory image.
1421
1422 @return: True if the image on the DUT is a factory image.
1423 False otherwise.
1424 """
1425 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1426 return result.exit_status == 0
1427
1428
1429 def _restart_ui(self):
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001430 """Restart the Chrome UI.
beepsc87ff602013-07-31 21:53:00 -07001431
1432 @raises: FactoryImageCheckerException for factory images, since
1433 we cannot attempt to restart ui on them.
1434 error.AutoservRunError for any other type of error that
1435 occurs while restarting ui.
1436 """
1437 if self._is_factory_image():
1438 raise FactoryImageCheckerException('Cannot restart ui on factory '
1439 'images')
1440
J. Richard Barnette84890bd2014-02-21 11:05:47 -08001441 # TODO(jrbarnette): The command to stop/start the ui job
1442 # should live inside cros_ui, too. However that would seem
1443 # to imply interface changes to the existing start()/restart()
1444 # functions, which is a bridge too far (for now).
1445 prompt = cros_ui.get_login_prompt_state(self)
1446 self.run('stop ui; start ui')
1447 cros_ui.wait_for_chrome_ready(prompt, self)
beepsc87ff602013-07-31 21:53:00 -07001448
1449
1450 def cleanup(self):
MK Ryu35d661e2014-09-25 17:44:10 -07001451 self.run('rm -f %s' % client_constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001452 try:
beepsc87ff602013-07-31 21:53:00 -07001453 self._restart_ui()
1454 except (error.AutotestRunError, error.AutoservRunError,
1455 FactoryImageCheckerException):
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001456 logging.warning('Unable to restart ui, rebooting device.')
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001457 # Since restarting the UI fails fall back to normal Autotest
1458 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001459 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001460 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001461 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001462 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001463
1464
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001465 def reboot(self, **dargs):
1466 """
1467 This function reboots the site host. The more generic
1468 RemoteHost.reboot() performs sync and sleeps for 5
1469 seconds. This is not necessary for Chrome OS devices as the
1470 sync should be finished in a short time during the reboot
1471 command.
1472 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001473 if 'reboot_cmd' not in dargs:
Doug Anderson7d5aeb22014-02-27 15:12:17 -08001474 reboot_timeout = dargs.get('reboot_timeout', 10)
1475 dargs['reboot_cmd'] = ('((reboot & sleep %d; reboot -f &)'
1476 ' </dev/null >/dev/null 2>&1 &)' %
1477 reboot_timeout)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001478 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001479 if 'fastsync' not in dargs:
1480 dargs['fastsync'] = True
Michael Liangda8c60a2014-06-03 13:24:51 -07001481
Charlie Mooneya8e6dab2014-05-29 14:37:55 -07001482 # For purposes of logging reboot times:
1483 # Get the board name i.e. 'daisy_spring'
Michael Liangca4f5a62014-07-10 15:45:13 -07001484 board_fullname = self.get_board()
1485
1486 # Strip the prefix and add it to dargs.
1487 dargs['board'] = board_fullname[board_fullname.find(':')+1:]
Fang Deng0ca40e22013-08-27 17:47:44 -07001488 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001489
1490
Gwendal Grignou7a61d2f2014-05-23 11:05:51 -07001491 def suspend(self, **dargs):
1492 """
1493 This function suspends the site host.
1494 """
1495 suspend_time = dargs.get('suspend_time', 60)
1496 dargs['timeout'] = suspend_time
1497 if 'suspend_cmd' not in dargs:
1498 cmd = ' && '.join(['echo 0 > /sys/class/rtc/rtc0/wakealarm',
1499 'echo +%d > /sys/class/rtc/rtc0/wakealarm' % suspend_time,
1500 'powerd_dbus_suspend --delay=0 &'])
1501 dargs['suspend_cmd'] = ('(( %s )'
1502 '< /dev/null >/dev/null 2>&1 &)' % cmd)
1503 super(CrosHost, self).suspend(**dargs)
1504
1505
Simran Basiec564392014-08-25 16:48:09 -07001506 def upstart_status(self, service_name):
1507 """Check the status of an upstart init script.
1508
1509 @param service_name: Service to look up.
1510
1511 @returns True if the service is running, False otherwise.
1512 """
1513 return self.run('status %s | grep start/running' %
1514 service_name).stdout.strip() != ''
1515
1516
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001517 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001518 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001519
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001520 Tests for the following conditions:
1521 1. All conditions tested by the parent version of this
1522 function.
1523 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001524 3. Sufficient space in /mnt/stateful_partition/encrypted.
1525 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001526
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001527 """
MK Ryu35d661e2014-09-25 17:44:10 -07001528 # Check if a job was crashed on this host.
1529 # If yes, avoid verification until crash-logs are collected.
1530 if self._need_crash_logs():
1531 raise error.AutoservCrashLogCollectRequired(
1532 'Need to collect crash-logs before verification')
1533
Fang Deng0ca40e22013-08-27 17:47:44 -07001534 super(CrosHost, self).verify_software()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001535 self.check_diskspace(
1536 '/mnt/stateful_partition',
1537 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001538 'SERVER', 'gb_diskspace_required', type=float,
1539 default=20.0))
Gaurav Shahe448af82014-06-19 15:18:59 -07001540 encrypted_stateful_path = '/mnt/stateful_partition/encrypted'
1541 # Not all targets build with encrypted stateful support.
1542 if self.path_exists(encrypted_stateful_path):
1543 self.check_diskspace(
1544 encrypted_stateful_path,
1545 global_config.global_config.get_config_value(
1546 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1547 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001548
Simran Basiec564392014-08-25 16:48:09 -07001549 if not self.upstart_status('system-services'):
Prashanth B5d0a0512014-04-25 12:26:08 -07001550 raise error.AutoservError('Chrome failed to reach login. '
1551 'System services not running.')
1552
beepsc87ff602013-07-31 21:53:00 -07001553 # Factory images don't run update engine,
1554 # goofy controls dbus on these DUTs.
1555 if not self._is_factory_image():
1556 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001557 # Makes sure python is present, loads and can use built in functions.
1558 # We have seen cases where importing cPickle fails with undefined
1559 # symbols in cPickle.so.
1560 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001561
1562
Dan Shi49ca0932014-11-14 11:22:27 -08001563 def verify_hardware(self):
1564 """Verify hardware system of a Chrome OS system.
1565
1566 Check following hardware conditions:
1567 1. Battery level.
1568 2. Is power adapter connected.
1569 """
1570 logging.info('Battery percentage: %s', self.get_battery_percentage())
1571 logging.info('Device %s power adapter connected and charging.',
1572 'has' if self.is_ac_connected() else 'does not have')
1573
1574
Fang Deng96667ca2013-08-01 17:46:18 -07001575 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1576 connect_timeout=None, alive_interval=None):
1577 """Override default make_ssh_command to use options tuned for Chrome OS.
1578
1579 Tuning changes:
1580 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1581 connection failure. Consistency with remote_access.sh.
1582
1583 - ServerAliveInterval=180; which causes SSH to ping connection every
1584 180 seconds. In conjunction with ServerAliveCountMax ensures
1585 that if the connection dies, Autotest will bail out quickly.
1586 Originally tried 60 secs, but saw frequent job ABORTS where
1587 the test completed successfully.
1588
1589 - ServerAliveCountMax=3; consistency with remote_access.sh.
1590
1591 - ConnectAttempts=4; reduce flakiness in connection errors;
1592 consistency with remote_access.sh.
1593
1594 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1595 Host keys change with every new installation, don't waste
1596 memory/space saving them.
1597
1598 - SSH protocol forced to 2; needed for ServerAliveInterval.
1599
1600 @param user User name to use for the ssh connection.
1601 @param port Port on the target host to use for ssh connection.
1602 @param opts Additional options to the ssh command.
1603 @param hosts_file Ignored.
1604 @param connect_timeout Ignored.
1605 @param alive_interval Ignored.
1606 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001607 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1608 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001609 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1610 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1611 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1612 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001613 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1614 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001615
1616
beeps32a63082013-08-22 14:02:29 -07001617 def _create_ssh_tunnel(self, port, local_port):
1618 """Create an ssh tunnel from local_port to port.
1619
1620 @param port: remote port on the host.
1621 @param local_port: local forwarding port.
1622
1623 @return: the tunnel process.
1624 """
1625 # Chrome OS on the target closes down most external ports
1626 # for security. We could open the port, but doing that
1627 # would conflict with security tests that check that only
1628 # expected ports are open. So, to get to the port on the
1629 # target we use an ssh tunnel.
1630 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1631 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1632 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1633 logging.debug('Full tunnel command: %s', tunnel_cmd)
1634 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1635 logging.debug('Started ssh tunnel, local = %d'
1636 ' remote = %d, pid = %d',
1637 local_port, port, tunnel_proc.pid)
1638 return tunnel_proc
1639
1640
Christopher Wileydd181852013-10-10 19:56:58 -07001641 def _setup_rpc(self, port, command_name, remote_pid=None):
beeps32a63082013-08-22 14:02:29 -07001642 """Sets up a tunnel process and performs rpc connection book keeping.
1643
1644 This method assumes that xmlrpc and jsonrpc never conflict, since
1645 we can only either have an xmlrpc or a jsonrpc server listening on
1646 a remote port. As such, it enforces a single proxy->remote port
1647 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1648 and then tries to start an xmlrpc proxy forwarded to the same port,
1649 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1650
1651 1. None of the methods on the xmlrpc proxy will work because
1652 the server listening on B is jsonrpc.
1653
1654 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1655 server, as the only use case currently is goofy, which is tied to
1656 the factory image. It is much easier to handle a failed xmlrpc
1657 call on the client than it is to terminate goofy in this scenario,
1658 as doing the latter might leave the DUT in a hard to recover state.
1659
1660 With the current implementation newer rpc proxy connections will
1661 terminate the tunnel processes of older rpc connections tunneling
1662 to the same remote port. If methods are invoked on the client
1663 after this has happened they will fail with connection closed errors.
1664
1665 @param port: The remote forwarding port.
1666 @param command_name: The name of the remote process, to terminate
1667 using pkill.
1668
1669 @return A url that we can use to initiate the rpc connection.
1670 """
1671 self.rpc_disconnect(port)
1672 local_port = utils.get_unused_port()
1673 tunnel_proc = self._create_ssh_tunnel(port, local_port)
Christopher Wileydd181852013-10-10 19:56:58 -07001674 self._rpc_proxy_map[port] = (command_name, tunnel_proc, remote_pid)
beeps32a63082013-08-22 14:02:29 -07001675 return self._RPC_PROXY_URL % local_port
1676
1677
Christopher Wileyd78249a2013-03-01 13:05:31 -08001678 def xmlrpc_connect(self, command, port, command_name=None,
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001679 ready_test_name=None, timeout_seconds=10,
1680 logfile='/dev/null'):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001681 """Connect to an XMLRPC server on the host.
1682
1683 The `command` argument should be a simple shell command that
1684 starts an XMLRPC server on the given `port`. The command
1685 must not daemonize, and must terminate cleanly on SIGTERM.
1686 The command is started in the background on the host, and a
1687 local XMLRPC client for the server is created and returned
1688 to the caller.
1689
1690 Note that the process of creating an XMLRPC client makes no
1691 attempt to connect to the remote server; the caller is
1692 responsible for determining whether the server is running
1693 correctly, and is ready to serve requests.
1694
Christopher Wileyd78249a2013-03-01 13:05:31 -08001695 Optionally, the caller can pass ready_test_name, a string
1696 containing the name of a method to call on the proxy. This
1697 method should take no parameters and return successfully only
1698 when the server is ready to process client requests. When
1699 ready_test_name is set, xmlrpc_connect will block until the
1700 proxy is ready, and throw a TestError if the server isn't
1701 ready by timeout_seconds.
1702
beeps32a63082013-08-22 14:02:29 -07001703 If a server is already running on the remote port, this
1704 method will kill it and disconnect the tunnel process
1705 associated with the connection before establishing a new one,
1706 by consulting the rpc_proxy_map in rpc_disconnect.
1707
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001708 @param command Shell command to start the server.
1709 @param port Port number on which the server is expected to
1710 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001711 @param command_name String to use as input to `pkill` to
1712 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001713 @param ready_test_name String containing the name of a
1714 method defined on the XMLRPC server.
1715 @param timeout_seconds Number of seconds to wait
1716 for the server to become 'ready.' Will throw a
1717 TestFail error if server is not ready in time.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001718 @param logfile Logfile to send output when running
1719 'command' argument.
Yusuf Mohsinally8d19e3c2013-11-21 14:25:45 -08001720
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001721 """
Christopher Wileyc14f06a2013-10-16 13:55:39 -07001722 # Clean up any existing state. If the caller is willing
1723 # to believe their server is down, we ought to clean up
1724 # any tunnels we might have sitting around.
1725 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001726 # Start the server on the host. Redirection in the command
1727 # below is necessary, because 'ssh' won't terminate until
1728 # background child processes close stdin, stdout, and
1729 # stderr.
Yusuf Mohsinallyfff89d62013-11-18 16:34:07 -08001730 remote_cmd = '%s </dev/null >%s 2>&1 & echo $!' % (command, logfile)
Christopher Wileydd181852013-10-10 19:56:58 -07001731 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001732 logging.debug('Started XMLRPC server on host %s, pid = %s',
1733 self.hostname, remote_pid)
1734
Christopher Wileydd181852013-10-10 19:56:58 -07001735 # Tunnel through SSH to be able to reach that remote port.
1736 rpc_url = self._setup_rpc(port, command_name, remote_pid=remote_pid)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001737 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001738
Christopher Wileyd78249a2013-03-01 13:05:31 -08001739 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001740 # retry.retry logs each attempt; calculate delay_sec to
1741 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001742 @retry.retry((socket.error,
1743 xmlrpclib.ProtocolError,
1744 httplib.BadStatusLine),
Chris Sosa65425082013-10-16 13:26:22 -07001745 timeout_min=timeout_seconds / 60.0,
1746 delay_sec=min(max(timeout_seconds / 20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001747 def ready_test():
1748 """ Call proxy.ready_test_name(). """
1749 getattr(proxy, ready_test_name)()
1750 successful = False
1751 try:
1752 logging.info('Waiting %d seconds for XMLRPC server '
1753 'to start.', timeout_seconds)
1754 ready_test()
1755 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001756 finally:
1757 if not successful:
1758 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001759 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001760 logging.info('XMLRPC server started successfully.')
1761 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001762
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001763
Jason Abeleb6f924f2013-11-13 16:01:54 -08001764 def syslog(self, message, tag='autotest'):
1765 """Logs a message to syslog on host.
1766
1767 @param message String message to log into syslog
1768 @param tag String tag prefix for syslog
1769
1770 """
1771 self.run('logger -t "%s" "%s"' % (tag, message))
1772
1773
beeps32a63082013-08-22 14:02:29 -07001774 def jsonrpc_connect(self, port):
1775 """Creates a jsonrpc proxy connection through an ssh tunnel.
1776
1777 This method exists to facilitate communication with goofy (which is
1778 the default system manager on all factory images) and as such, leaves
1779 most of the rpc server sanity checking to the caller. Unlike
1780 xmlrpc_connect, this method does not facilitate the creation of a remote
1781 jsonrpc server, as the only clients of this code are factory tests,
1782 for which the goofy system manager is built in to the image and starts
1783 when the target boots.
1784
1785 One can theoretically create multiple jsonrpc proxies all forwarded
1786 to the same remote port, provided the remote port has an rpc server
1787 listening. However, in doing so we stand the risk of leaking an
1788 existing tunnel process, so we always disconnect any older tunnels
1789 we might have through rpc_disconnect.
1790
1791 @param port: port on the remote host that is serving this proxy.
1792
1793 @return: The client proxy.
1794 """
1795 if not jsonrpclib:
1796 logging.warning('Jsonrpclib could not be imported. Check that '
1797 'site-packages contains jsonrpclib.')
1798 return None
1799
1800 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1801
1802 logging.info('Established a jsonrpc connection through port %s.', port)
1803 return proxy
1804
1805
1806 def rpc_disconnect(self, port):
1807 """Disconnect from an RPC server on the host.
1808
1809 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001810 the given `port`. Also closes the local ssh tunnel created
1811 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001812 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001813 client object; however disconnection will cause all
1814 subsequent calls to methods on the object to fail.
1815
1816 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001817 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001818
1819 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001820 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001821 """
beeps32a63082013-08-22 14:02:29 -07001822 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001823 return
Christopher Wileydd181852013-10-10 19:56:58 -07001824 remote_name, tunnel_proc, remote_pid = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001825 if remote_name:
1826 # We use 'pkill' to find our target process rather than
1827 # a PID, because the host may have rebooted since
1828 # connecting, and we don't want to kill an innocent
1829 # process with the same PID.
1830 #
1831 # 'pkill' helpfully exits with status 1 if no target
1832 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001833 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001834 # status.
1835 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001836 if remote_pid:
1837 logging.info('Waiting for RPC server "%s" shutdown',
1838 remote_name)
1839 start_time = time.time()
1840 while (time.time() - start_time <
1841 self._RPC_SHUTDOWN_TIMEOUT_SECONDS):
1842 running_processes = self.run(
1843 "pgrep -f '%s'" % remote_name,
1844 ignore_status=True).stdout.split()
1845 if not remote_pid in running_processes:
1846 logging.info('Shut down RPC server.')
1847 break
1848 time.sleep(self._RPC_SHUTDOWN_POLLING_PERIOD_SECONDS)
1849 else:
1850 raise error.TestError('Failed to shutdown RPC server %s' %
1851 remote_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001852
1853 if tunnel_proc.poll() is None:
1854 tunnel_proc.terminate()
1855 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1856 else:
1857 logging.debug('Tunnel pid %d terminated early, status %d',
1858 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001859 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001860
1861
beeps32a63082013-08-22 14:02:29 -07001862 def rpc_disconnect_all(self):
1863 """Disconnect all known RPC proxy ports."""
1864 for port in self._rpc_proxy_map.keys():
1865 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001866
1867
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001868 def poor_mans_rpc(self, fun):
1869 """
1870 Calls a function from client utils on the host and returns a string.
1871
1872 @param fun function in client utils namespace.
1873 @return output string from calling fun.
1874 """
Simran Basi263a9d32014-08-19 11:16:51 -07001875 script = 'cd %s/bin; ' % autotest.Autotest.get_installed_autodir(self)
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07001876 script += 'python -c "import common; import utils;'
1877 script += 'print utils.%s"' % fun
1878 return script
1879
1880
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001881 def _ping_check_status(self, status):
1882 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001883
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001884 @param status Check the ping status against this value.
1885 @return True iff `status` and the result of ping are the same
1886 (i.e. both True or both False).
1887
1888 """
1889 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1890 return not (status ^ (ping_val == 0))
1891
1892 def _ping_wait_for_status(self, status, timeout):
1893 """Wait for the host to have a given status (UP or DOWN).
1894
1895 Status is checked by polling. Polling will not last longer
1896 than the number of seconds in `timeout`. The polling
1897 interval will be long enough that only approximately
1898 _PING_WAIT_COUNT polling cycles will be executed, subject
1899 to a maximum interval of about one minute.
1900
1901 @param status Waiting will stop immediately if `ping` of the
1902 host returns this status.
1903 @param timeout Poll for at most this many seconds.
1904 @return True iff the host status from `ping` matched the
1905 requested status at the time of return.
1906
1907 """
1908 # _ping_check_status() takes about 1 second, hence the
1909 # "- 1" in the formula below.
1910 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1911 end_time = time.time() + timeout
1912 while time.time() <= end_time:
1913 if self._ping_check_status(status):
1914 return True
1915 if poll_interval > 0:
1916 time.sleep(poll_interval)
1917
1918 # The last thing we did was sleep(poll_interval), so it may
1919 # have been too long since the last `ping`. Check one more
1920 # time, just to be sure.
1921 return self._ping_check_status(status)
1922
1923 def ping_wait_up(self, timeout):
1924 """Wait for the host to respond to `ping`.
1925
1926 N.B. This method is not a reliable substitute for
1927 `wait_up()`, because a host that responds to ping will not
1928 necessarily respond to ssh. This method should only be used
1929 if the target DUT can be considered functional even if it
1930 can't be reached via ssh.
1931
1932 @param timeout Minimum time to allow before declaring the
1933 host to be non-responsive.
1934 @return True iff the host answered to ping before the timeout.
1935
1936 """
1937 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001938
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001939 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001940 """Wait until the host no longer responds to `ping`.
1941
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001942 This function can be used as a slightly faster version of
1943 `wait_down()`, by avoiding potentially long ssh timeouts.
1944
1945 @param timeout Minimum time to allow for the host to become
1946 non-responsive.
1947 @return True iff the host quit answering ping before the
1948 timeout.
1949
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001950 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001951 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001952
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001953 def test_wait_for_sleep(self, sleep_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001954 """Wait for the client to enter low-power sleep mode.
1955
1956 The test for "is asleep" can't distinguish a system that is
1957 powered off; to confirm that the unit was asleep, it is
1958 necessary to force resume, and then call
1959 `test_wait_for_resume()`.
1960
1961 This function is expected to be called from a test as part
1962 of a sequence like the following:
1963
1964 ~~~~~~~~
1965 boot_id = host.get_boot_id()
1966 # trigger sleep on the host
1967 host.test_wait_for_sleep()
1968 # trigger resume on the host
1969 host.test_wait_for_resume(boot_id)
1970 ~~~~~~~~
1971
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001972 @param sleep_timeout time limit in seconds to allow the host sleep.
1973
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001974 @exception TestFail The host did not go to sleep within
1975 the allowed time.
1976 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001977 if sleep_timeout is None:
1978 sleep_timeout = self.SLEEP_TIMEOUT
1979
1980 if not self.ping_wait_down(timeout=sleep_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001981 raise error.TestFail(
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001982 'client failed to sleep after %d seconds' % sleep_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001983
1984
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001985 def test_wait_for_resume(self, old_boot_id, resume_timeout=None):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001986 """Wait for the client to resume from low-power sleep mode.
1987
1988 The `old_boot_id` parameter should be the value from
1989 `get_boot_id()` obtained prior to entering sleep mode. A
1990 `TestFail` exception is raised if the boot id changes.
1991
1992 See @ref test_wait_for_sleep for more on this function's
1993 usage.
1994
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001995 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001996 target host went to sleep.
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08001997 @param resume_timeout time limit in seconds to allow the host up.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001998
1999 @exception TestFail The host did not respond within the
2000 allowed time.
2001 @exception TestFail The host responded, but the boot id test
2002 indicated a reboot rather than a sleep
2003 cycle.
2004 """
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002005 if resume_timeout is None:
2006 resume_timeout = self.RESUME_TIMEOUT
2007
2008 if not self.wait_up(timeout=resume_timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002009 raise error.TestFail(
2010 'client failed to resume from sleep after %d seconds' %
Tom Wai-Hong Tamfced4f62014-04-17 10:56:30 +08002011 resume_timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002012 else:
2013 new_boot_id = self.get_boot_id()
2014 if new_boot_id != old_boot_id:
2015 raise error.TestFail(
2016 'client rebooted, but sleep was expected'
2017 ' (old boot %s, new boot %s)'
2018 % (old_boot_id, new_boot_id))
2019
2020
2021 def test_wait_for_shutdown(self):
2022 """Wait for the client to shut down.
2023
2024 The test for "has shut down" can't distinguish a system that
2025 is merely asleep; to confirm that the unit was down, it is
2026 necessary to force boot, and then call test_wait_for_boot().
2027
2028 This function is expected to be called from a test as part
2029 of a sequence like the following:
2030
2031 ~~~~~~~~
2032 boot_id = host.get_boot_id()
2033 # trigger shutdown on the host
2034 host.test_wait_for_shutdown()
2035 # trigger boot on the host
2036 host.test_wait_for_boot(boot_id)
2037 ~~~~~~~~
2038
2039 @exception TestFail The host did not shut down within the
2040 allowed time.
2041 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08002042 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002043 raise error.TestFail(
2044 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002045 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002046
2047
2048 def test_wait_for_boot(self, old_boot_id=None):
2049 """Wait for the client to boot from cold power.
2050
2051 The `old_boot_id` parameter should be the value from
2052 `get_boot_id()` obtained prior to shutting down. A
2053 `TestFail` exception is raised if the boot id does not
2054 change. The boot id test is omitted if `old_boot_id` is not
2055 specified.
2056
2057 See @ref test_wait_for_shutdown for more on this function's
2058 usage.
2059
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08002060 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002061 shut down.
2062
2063 @exception TestFail The host did not respond within the
2064 allowed time.
2065 @exception TestFail The host responded, but the boot id test
2066 indicated that there was no reboot.
2067 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002068 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002069 raise error.TestFail(
2070 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07002071 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07002072 elif old_boot_id:
2073 if self.get_boot_id() == old_boot_id:
2074 raise error.TestFail(
2075 'client is back up, but did not reboot'
2076 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07002077
2078
2079 @staticmethod
2080 def check_for_rpm_support(hostname):
2081 """For a given hostname, return whether or not it is powered by an RPM.
2082
Simran Basi1df55112013-09-06 11:25:09 -07002083 @param hostname: hostname to check for rpm support.
2084
Simran Basid5e5e272012-09-24 15:23:59 -07002085 @return None if this host does not follows the defined naming format
2086 for RPM powered DUT's in the lab. If it does follow the format,
2087 it returns a regular expression MatchObject instead.
2088 """
Fang Dengdeba14f2014-11-14 11:54:09 -08002089 m = re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
2090 if not m:
2091 return None
2092 try:
2093 lab = int(m.group(1))
2094 row = int(m.group(3))
2095 rack = int(m.group(4))
2096 except (TypeError, ValueError) as e:
2097 return m
2098 if lab == 2 and row>= 1 and row<= 5 and rack>= 1 and rack<= 7:
2099 # TODO(fdeng): temporarily disable support for duts
2100 # behined hydra2 in chromeos2, remove once
2101 # b/17612645 is fixed.
2102 return None
2103 if lab == 4 and (rack == 0 or row == 13):
2104 # TODO(fdeng): disable support for duts behind hydra3
2105 # for chromeos4, remove once b/15410667 is fixed
2106 return None
2107 return m
Simran Basid5e5e272012-09-24 15:23:59 -07002108
2109
2110 def has_power(self):
2111 """For this host, return whether or not it is powered by an RPM.
2112
2113 @return True if this host is in the CROS lab and follows the defined
2114 naming format.
2115 """
Fang Deng0ca40e22013-08-27 17:47:44 -07002116 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07002117
2118
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002119 def _set_power(self, state, power_method):
2120 """Sets the power to the host via RPM, Servo or manual.
2121
2122 @param state Specifies which power state to set to DUT
2123 @param power_method Specifies which method of power control to
2124 use. By default "RPM" will be used. Valid values
2125 are the strings "RPM", "manual", "servoj10".
2126
2127 """
2128 ACCEPTABLE_STATES = ['ON', 'OFF']
2129
2130 if state.upper() not in ACCEPTABLE_STATES:
2131 raise error.TestError('State must be one of: %s.'
2132 % (ACCEPTABLE_STATES,))
2133
2134 if power_method == self.POWER_CONTROL_SERVO:
2135 logging.info('Setting servo port J10 to %s', state)
2136 self.servo.set('prtctl3_pwren', state.lower())
2137 time.sleep(self._USB_POWER_TIMEOUT)
2138 elif power_method == self.POWER_CONTROL_MANUAL:
2139 logging.info('You have %d seconds to set the AC power to %s.',
2140 self._POWER_CYCLE_TIMEOUT, state)
2141 time.sleep(self._POWER_CYCLE_TIMEOUT)
2142 else:
2143 if not self.has_power():
2144 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07002145 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
2146 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
2147 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07002148 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07002149
2150
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002151 def power_off(self, power_method=POWER_CONTROL_RPM):
2152 """Turn off power to this host via RPM, Servo or manual.
2153
2154 @param power_method Specifies which method of power control to
2155 use. By default "RPM" will be used. Valid values
2156 are the strings "RPM", "manual", "servoj10".
2157
2158 """
2159 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07002160
2161
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08002162 def power_on(self, power_method=POWER_CONTROL_RPM):
2163 """Turn on power to this host via RPM, Servo or manual.
2164
2165 @param power_method Specifies which method of power control to
2166 use. By default "RPM" will be used. Valid values
2167 are the strings "RPM", "manual", "servoj10".
2168
2169 """
2170 self._set_power('ON', power_method)
2171
2172
2173 def power_cycle(self, power_method=POWER_CONTROL_RPM):
2174 """Cycle power to this host by turning it OFF, then ON.
2175
2176 @param power_method Specifies which method of power control to
2177 use. By default "RPM" will be used. Valid values
2178 are the strings "RPM", "manual", "servoj10".
2179
2180 """
2181 if power_method in (self.POWER_CONTROL_SERVO,
2182 self.POWER_CONTROL_MANUAL):
2183 self.power_off(power_method=power_method)
2184 time.sleep(self._POWER_CYCLE_TIMEOUT)
2185 self.power_on(power_method=power_method)
2186 else:
2187 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002188
2189
2190 def get_platform(self):
2191 """Determine the correct platform label for this host.
2192
2193 @returns a string representing this host's platform.
2194 """
2195 crossystem = utils.Crossystem(self)
2196 crossystem.init()
2197 # Extract fwid value and use the leading part as the platform id.
2198 # fwid generally follow the format of {platform}.{firmware version}
2199 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
2200 platform = crossystem.fwid().split('.')[0].lower()
2201 # Newer platforms start with 'Google_' while the older ones do not.
2202 return platform.replace('google_', '')
2203
2204
Hung-ying Tyanb1328032014-04-01 14:18:54 +08002205 def get_architecture(self):
2206 """Determine the correct architecture label for this host.
2207
2208 @returns a string representing this host's architecture.
2209 """
2210 crossystem = utils.Crossystem(self)
2211 crossystem.init()
2212 return crossystem.arch()
2213
2214
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002215 def get_chrome_version(self):
2216 """Gets the Chrome version number and milestone as strings.
2217
2218 Invokes "chrome --version" to get the version number and milestone.
2219
2220 @return A tuple (chrome_ver, milestone) where "chrome_ver" is the
2221 current Chrome version number as a string (in the form "W.X.Y.Z")
2222 and "milestone" is the first component of the version number
2223 (the "W" from "W.X.Y.Z"). If the version number cannot be parsed
2224 in the "W.X.Y.Z" format, the "chrome_ver" will be the full output
2225 of "chrome --version" and the milestone will be the empty string.
2226
2227 """
MK Ryu35d661e2014-09-25 17:44:10 -07002228 version_string = self.run(client_constants.CHROME_VERSION_COMMAND).stdout
Luis Lozano40b7d0d2014-01-17 15:12:06 -08002229 return utils.parse_chrome_version(version_string)
2230
Aviv Keshet74c89a92013-02-04 15:18:30 -08002231 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07002232 def get_board(self):
2233 """Determine the correct board label for this host.
2234
2235 @returns a string representing this host's board.
2236 """
2237 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
2238 run_method=self.run)
2239 board = release_info['CHROMEOS_RELEASE_BOARD']
2240 # Devices in the lab generally have the correct board name but our own
2241 # development devices have {board_name}-signed-{key_type}. The board
2242 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08002243 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07002244 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08002245 return board_format_string % board.split('-')[0]
2246 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07002247
2248
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002249 @label_decorator('board_freq_mem')
2250 def get_board_with_frequency_and_memory(self):
2251 """
2252 Determines the board name with frequency and memory.
2253
2254 @returns a more detailed string representing the board. Examples are
2255 butterfly_1.1GHz_2GB, link_1.8GHz_4GB, x86-zgb_1.7GHz_2GB
2256 """
2257 board = self.run(self.poor_mans_rpc(
2258 'get_board_with_frequency_and_memory()')).stdout
2259 return 'board_freq_mem:%s' % str.strip(board)
2260
2261
Aviv Keshet74c89a92013-02-04 15:18:30 -08002262 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002263 def has_lightsensor(self):
2264 """Determine the correct board label for this host.
2265
2266 @returns the string 'lightsensor' if this host has a lightsensor or
2267 None if it does not.
2268 """
2269 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08002270 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07002271 try:
2272 # Run the search cmd following the symlinks. Stderr_tee is set to
2273 # None as there can be a symlink loop, but the command will still
2274 # execute correctly with a few messages printed to stderr.
2275 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
2276 return 'lightsensor'
2277 except error.AutoservRunError:
2278 # egrep exited with a return code of 1 meaning none of the possible
2279 # lightsensor files existed.
2280 return None
2281
2282
Aviv Keshet74c89a92013-02-04 15:18:30 -08002283 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07002284 def has_bluetooth(self):
2285 """Determine the correct board label for this host.
2286
2287 @returns the string 'bluetooth' if this host has bluetooth or
2288 None if it does not.
2289 """
2290 try:
2291 self.run('test -d /sys/class/bluetooth/hci0')
2292 # test exited with a return code of 0.
2293 return 'bluetooth'
2294 except error.AutoservRunError:
2295 # test exited with a return code 1 meaning the directory did not
2296 # exist.
2297 return None
2298
2299
Ilja H. Friedel1232e8a2014-06-17 21:30:48 -07002300 @label_decorator('gpu_family')
2301 def get_gpu_family(self):
2302 """
2303 Determine GPU family.
2304
2305 @returns a string representing the gpu family. Examples are mali, tegra,
2306 pinetrail, sandybridge, ivybridge, haswell and baytrail.
2307 """
2308 gpu_family = self.run(self.poor_mans_rpc('get_gpu_family()')).stdout
2309 return 'gpu_family:%s' % str.strip(gpu_family)
2310
2311
Ilja Friedel0ce0b602013-08-15 18:45:27 -07002312 @label_decorator('graphics')
2313 def get_graphics(self):
2314 """
2315 Determine the correct board label for this host.
2316
2317 @returns a string representing this host's graphics. For now ARM boards
2318 return graphics:gles while all other boards return graphics:gl. This
2319 may change over time, but for robustness reasons this should avoid
2320 executing code in actual graphics libraries (which may not be ready and
2321 is tested by graphics_GLAPICheck).
2322 """
2323 uname = self.run('uname -a').stdout.lower()
2324 if 'arm' in uname:
2325 return 'graphics:gles'
2326 return 'graphics:gl'
2327
2328
Bill Richardson4f595f52014-02-13 16:20:26 -08002329 @label_decorator('ec')
2330 def get_ec(self):
2331 """
2332 Determine the type of EC on this host.
2333
2334 @returns a string representing this host's embedded controller type.
2335 At present, it only returns "ec:cros", for Chrome OS ECs. Other types
2336 of EC (or none) don't return any strings, since no tests depend on
2337 those.
2338 """
2339 cmd = 'mosys ec info'
2340 # The output should look like these, so that the last field should
2341 # match our EC version scheme:
2342 #
2343 # stm | stm32f100 | snow_v1.3.139-375eb9f
2344 # ti | Unknown-10de | peppy_v1.5.114-5d52788
2345 #
2346 # Non-Chrome OS ECs will look like these:
2347 #
2348 # ENE | KB932 | 00BE107A00
2349 # ite | it8518 | 3.08
2350 #
2351 # And some systems don't have ECs at all (Lumpy, for example).
2352 regexp = r'^.*\|\s*(\S+_v\d+\.\d+\.\d+-[0-9a-f]+)\s*$'
2353
2354 ecinfo = self.run(command=cmd, ignore_status=True)
2355 if ecinfo.exit_status == 0:
2356 res = re.search(regexp, ecinfo.stdout)
2357 if res:
2358 logging.info("EC version is %s", res.groups()[0])
2359 return 'ec:cros'
2360 logging.info("%s got: %s", cmd, ecinfo.stdout)
2361 # Has an EC, but it's not a Chrome OS EC
2362 return None
2363 logging.info("%s exited with status %d", cmd, ecinfo.exit_status)
2364 # No EC present
2365 return None
2366
2367
Alec Berg31b932b2014-04-04 16:09:11 -07002368 @label_decorator('accels')
2369 def get_accels(self):
2370 """
2371 Determine the type of accelerometers on this host.
2372
2373 @returns a string representing this host's accelerometer type.
2374 At present, it only returns "accel:cros-ec", for accelerometers
2375 attached to a Chrome OS EC, or none, if no accelerometers.
2376 """
2377 # Check to make sure we have ectool
2378 rv = self.run('which ectool', ignore_status=True)
2379 if rv.exit_status:
2380 logging.info("No ectool cmd found, assuming no EC accelerometers")
2381 return None
2382
2383 # Check that the EC supports the motionsense command
2384 rv = self.run('ectool motionsense', ignore_status=True)
2385 if rv.exit_status:
2386 logging.info("EC does not support motionsense command "
2387 "assuming no EC accelerometers")
2388 return None
2389
2390 # Check that EC motion sensors are active
2391 active = self.run('ectool motionsense active').stdout.split('\n')
2392 if active[0] == "0":
2393 logging.info("Motion sense inactive, assuming no EC accelerometers")
2394 return None
2395
2396 logging.info("EC accelerometers found")
2397 return 'accel:cros-ec'
2398
2399
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002400 @label_decorator('chameleon')
2401 def has_chameleon(self):
2402 """Determine if a Chameleon connected to this host.
2403
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002404 @returns a list containing two strings ('chameleon' and
2405 'chameleon:' + label, e.g. 'chameleon:hdmi') if this host
2406 has a Chameleon or None if it has not.
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002407 """
2408 if self._chameleon_host:
Tom Wai-Hong Tambadbb332014-10-10 02:59:41 +08002409 return ['chameleon', 'chameleon:' + self.chameleon.get_label()]
Tom Wai-Hong Tam3d6790d2014-04-14 16:15:47 +08002410 else:
2411 return None
2412
2413
Derek Basehorec71ff622014-07-07 15:18:40 -07002414 @label_decorator('power_supply')
2415 def get_power_supply(self):
2416 """
2417 Determine what type of power supply the host has
2418
2419 @returns a string representing this host's power supply.
2420 'power:battery' when the device has a battery intended for
2421 extended use
2422 'power:AC_primary' when the device has a battery not intended
2423 for extended use (for moving the machine, etc)
2424 'power:AC_only' when the device has no battery at all.
2425 """
2426 psu = self.run(command='mosys psu type', ignore_status=True)
2427 if psu.exit_status:
2428 # The psu command for mosys is not included for all platforms. The
2429 # assumption is that the device will have a battery if the command
2430 # is not found.
2431 return 'power:battery'
2432
2433 psu_str = psu.stdout.strip()
2434 if psu_str == 'unknown':
2435 return None
2436
2437 return 'power:%s' % psu_str
2438
2439
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002440 @label_decorator('storage')
2441 def get_storage(self):
2442 """
2443 Determine the type of boot device for this host.
2444
2445 Determine if the internal device is SCSI or dw_mmc device.
2446 Then check that it is SSD or HDD or eMMC or something else.
2447
2448 @returns a string representing this host's internal device type.
2449 'storage:ssd' when internal device is solid state drive
2450 'storage:hdd' when internal device is hard disk drive
2451 'storage:mmc' when internal device is mmc drive
2452 None When internal device is something else or
2453 when we are unable to determine the type
2454 """
2455 # The output should be /dev/mmcblk* for SD/eMMC or /dev/sd* for scsi
2456 rootdev_cmd = ' '.join(['. /usr/sbin/write_gpt.sh;',
2457 '. /usr/share/misc/chromeos-common.sh;',
2458 'load_base_vars;',
2459 'get_fixed_dst_drive'])
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002460 rootdev = self.run(command=rootdev_cmd, ignore_status=True)
2461 if rootdev.exit_status:
2462 logging.info("Fail to run %s", rootdev_cmd)
2463 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002464 rootdev_str = rootdev.stdout.strip()
2465
2466 if not rootdev_str:
2467 return None
2468
2469 rootdev_base = os.path.basename(rootdev_str)
2470
2471 mmc_pattern = '/dev/mmcblk[0-9]'
2472 if re.match(mmc_pattern, rootdev_str):
2473 # Use type to determine if the internal device is eMMC or somthing
2474 # else. We can assume that MMC is always an internal device.
2475 type_cmd = 'cat /sys/block/%s/device/type' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002476 type = self.run(command=type_cmd, ignore_status=True)
2477 if type.exit_status:
2478 logging.info("Fail to run %s", type_cmd)
2479 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002480 type_str = type.stdout.strip()
2481
2482 if type_str == 'MMC':
2483 return 'storage:mmc'
2484
2485 scsi_pattern = '/dev/sd[a-z]+'
2486 if re.match(scsi_pattern, rootdev.stdout):
2487 # Read symlink for /sys/block/sd* to determine if the internal
2488 # device is connected via ata or usb.
2489 link_cmd = 'readlink /sys/block/%s' % rootdev_base
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002490 link = self.run(command=link_cmd, ignore_status=True)
2491 if link.exit_status:
2492 logging.info("Fail to run %s", link_cmd)
2493 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002494 link_str = link.stdout.strip()
2495 if 'usb' in link_str:
2496 return None
2497
2498 # Read rotation to determine if the internal device is ssd or hdd.
2499 rotate_cmd = str('cat /sys/block/%s/queue/rotational'
2500 % rootdev_base)
Puthikorn Voravootivat03c51682014-04-24 13:52:12 -07002501 rotate = self.run(command=rotate_cmd, ignore_status=True)
2502 if rotate.exit_status:
2503 logging.info("Fail to run %s", rotate_cmd)
2504 return None
Puthikorn Voravootivatfa011242014-03-14 18:45:11 -07002505 rotate_str = rotate.stdout.strip()
2506
2507 rotate_dict = {'0':'storage:ssd', '1':'storage:hdd'}
2508 return rotate_dict.get(rotate_str)
2509
2510 # All other internal device / error case will always fall here
2511 return None
2512
2513
Dan Shi4e9a2aa2014-03-24 14:28:42 -07002514 @label_decorator('servo')
2515 def get_servo(self):
2516 """Determine if the host has a servo attached.
2517
2518 If the host has a working servo attached, it should have a servo label.
2519
2520 @return: string 'servo' if the host has servo attached. Otherwise,
2521 returns None.
2522 """
2523 return 'servo' if self._servo_host else None
2524
2525
Dan Shi5beba472014-05-28 22:46:07 -07002526 @label_decorator('video_labels')
2527 def get_video_labels(self):
2528 """Run /usr/local/bin/avtest_label_detect to get a list of video labels.
2529
2530 Sample output of avtest_label_detect:
2531 Detected label: hw_video_acc_vp8
2532 Detected label: webcam
2533
2534 @return: A list of labels detected by tool avtest_label_detect.
2535 """
2536 try:
Simran Basi40ca8182014-07-17 18:41:20 -07002537 # TODO (sbasi) crbug.com/391081 - Remove once the proper fix has
2538 # landed and supporting images older than the fix is no longer
2539 # necessary.
2540 # Change back to VT1 so avtest_label_detect does not get stuck.
2541 self.run('chvt 1')
Dan Shi5beba472014-05-28 22:46:07 -07002542 result = self.run('/usr/local/bin/avtest_label_detect').stdout
2543 return re.findall('^Detected label: (\w+)$', result, re.M)
2544 except error.AutoservRunError:
2545 # The tool is not installed.
2546 return []
2547
2548
mussa584b4462014-06-20 15:13:28 -07002549 @label_decorator('video_glitch_detection')
2550 def is_video_glitch_detection_supported(self):
2551 """ Determine if a board under test is supported for video glitch
2552 detection tests.
2553
2554 @return: 'video_glitch_detection' if board is supported, None otherwise.
2555 """
2556 parser = ConfigParser.SafeConfigParser()
2557 filename = os.path.join(
2558 common.autotest_dir, 'client/cros/video/device_spec.conf')
2559
2560 dut = self.get_board().replace(ds_constants.BOARD_PREFIX, '')
2561
2562 try:
2563 parser.read(filename)
mussa584b4462014-06-20 15:13:28 -07002564 supported_boards = parser.sections()
2565
Mussa83c84d62014-10-02 12:11:28 -07002566 return 'video_glitch_detection' if dut in supported_boards else None
mussa584b4462014-06-20 15:13:28 -07002567
2568 except ConfigParser.error:
2569 # something went wrong while parsing the conf file
2570 return None
2571
Katherine Threlkeld7b97a9f2014-06-24 13:47:14 -07002572 @label_decorator('touch_labels')
2573 def get_touch(self):
2574 """
2575 Determine whether board under test has a touchpad or touchscreen.
2576
2577 @return: A list of some combination of 'touchscreen' and 'touchpad',
2578 depending on what is present on the device.
2579 """
2580 labels = []
2581 input_cmd = '/opt/google/input/inputcontrol --names -t %s'
2582 for elt in ['touchpad', 'touchscreen']:
2583 if self.run(input_cmd % elt).stdout:
2584 labels.append(elt)
2585 return labels
2586
2587
mussa584b4462014-06-20 15:13:28 -07002588
Simran Basic6f1f7a2012-10-16 10:47:46 -07002589 def get_labels(self):
2590 """Return a list of labels for this given host.
2591
2592 This is the main way to retrieve all the automatic labels for a host
2593 as it will run through all the currently implemented label functions.
2594 """
2595 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08002596 for label_function in self._LABEL_FUNCTIONS:
J. Richard Barnetteb869b222014-09-03 17:55:44 -07002597 try:
2598 label = label_function(self)
2599 except Exception as e:
2600 logging.error('Label function %s failed; ignoring it.',
2601 label_function.__name__)
2602 logging.exception(e)
2603 label = None
Simran Basic6f1f7a2012-10-16 10:47:46 -07002604 if label:
Dan Shi5beba472014-05-28 22:46:07 -07002605 if type(label) is str:
2606 labels.append(label)
2607 elif type(label) is list:
2608 labels.extend(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -07002609 return labels
Dan Shi85276d42014-04-08 22:11:45 -07002610
2611
2612 def is_boot_from_usb(self):
2613 """Check if DUT is boot from USB.
2614
2615 @return: True if DUT is boot from usb.
2616 """
2617 device = self.run('rootdev -s -d').stdout.strip()
2618 removable = int(self.run('cat /sys/block/%s/removable' %
2619 os.path.basename(device)).stdout.strip())
2620 return removable == 1
Helen Zhang17dae2b2014-11-11 09:25:52 -08002621
2622
2623 def read_from_meminfo(self, key):
Dan Shi49ca0932014-11-14 11:22:27 -08002624 """Return the memory info from /proc/meminfo
Helen Zhang17dae2b2014-11-11 09:25:52 -08002625
2626 @param key: meminfo requested
2627
2628 @return the memory value as a string
2629
2630 """
Helen Zhang17dae2b2014-11-11 09:25:52 -08002631 meminfo = self.run('grep %s /proc/meminfo' % key).stdout.strip()
2632 logging.debug('%s', meminfo)
2633 return int(re.search(r'\d+', meminfo).group(0))