blob: ba0f847950fd255b9d1da54a933e8102e226e847 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07006import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import logging
Dan Shi0f466e82013-02-22 15:44:58 -08008import os
Simran Basid5e5e272012-09-24 15:23:59 -07009import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080010import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070011import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070012import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070013import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070014
J. Richard Barnette45e93de2012-04-11 17:24:15 -070015from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080016from autotest_lib.client.common_lib import error
17from autotest_lib.client.common_lib import global_config
beeps687243d2013-07-18 15:29:27 -070018from autotest_lib.client.common_lib import site_utils
J. Richard Barnette45e93de2012-04-11 17:24:15 -070019from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080020from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080021from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080022from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070023from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070024from autotest_lib.server import autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070025from autotest_lib.server import utils as server_utils
Scott Zawalski89c44dd2013-02-26 09:28:02 -050026from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070027from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
J. Richard Barnette75487572013-03-08 12:47:50 -080028from autotest_lib.server.cros.servo import servo
Fang Deng96667ca2013-08-01 17:46:18 -070029from autotest_lib.server.hosts import abstract_ssh
beeps687243d2013-07-18 15:29:27 -070030from autotest_lib.site_utils.graphite import stats
Simran Basidcff4252012-11-20 16:13:20 -080031from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070032
33
beeps32a63082013-08-22 14:02:29 -070034try:
35 import jsonrpclib
36except ImportError:
37 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070038
Fang Dengd1c2b732013-08-20 12:59:46 -070039
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080040def _make_servo_hostname(hostname):
41 host_parts = hostname.split('.')
42 host_parts[0] = host_parts[0] + '-servo'
43 return '.'.join(host_parts)
44
45
beepsc87ff602013-07-31 21:53:00 -070046class FactoryImageCheckerException(error.AutoservError):
47 """Exception raised when an image is a factory image."""
48 pass
49
50
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080051def _get_lab_servo(target_hostname):
52 """Instantiate a Servo for |target_hostname| in the lab.
53
54 Assuming that |target_hostname| is a device in the CrOS test
55 lab, create and return a Servo object pointed at the servo
56 attached to that DUT. The servo in the test lab is assumed
57 to already have servod up and running on it.
58
59 @param target_hostname: device whose servo we want to target.
60 @return an appropriately configured Servo instance.
61 """
62 servo_host = _make_servo_hostname(target_hostname)
63 if utils.host_is_in_lab_zone(servo_host):
64 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080065 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080066 except: # pylint: disable=W0702
67 # TODO(jrbarnette): Long-term, if we can't get to
68 # a servo in the lab, we want to fail, so we should
69 # pass any exceptions along. Short-term, we're not
70 # ready to rely on servo, so we ignore failures.
71 pass
72 return None
73
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080074
Aviv Keshet74c89a92013-02-04 15:18:30 -080075def add_label_detector(label_function_list, label_list=None, label=None):
76 """Decorator used to group functions together into the provided list.
77 @param label_function_list: List of label detecting functions to add
78 decorated function to.
79 @param label_list: List of detectable labels to add detectable labels to.
80 (Default: None)
81 @param label: Label string that is detectable by this detection function
82 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080083 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070084 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080085 """
86 @param func: The function to be added as a detector.
87 """
88 label_function_list.append(func)
89 if label and label_list is not None:
90 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070091 return func
92 return add_func
93
94
Fang Deng0ca40e22013-08-27 17:47:44 -070095class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070096 """Chromium OS specific subclass of Host."""
97
98 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050099 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700100
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800101 # Time to wait for new kernel to be marked successful after
102 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -0700103 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700104
Richard Barnette03a0c132012-11-05 12:40:35 -0800105 # Timeout values (in seconds) associated with various Chrome OS
106 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700107 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800108 # In general, a good rule of thumb is that the timeout can be up
109 # to twice the typical measured value on the slowest platform.
110 # The times here have not necessarily been empirically tested to
111 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700112 #
113 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800114 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
115 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700116 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800117 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800118 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700119 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800120 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800121 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800122 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700123 # REBOOT_TIMEOUT: How long to wait for a reboot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800124 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700125
126 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800127 RESUME_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700128 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700129 USB_BOOT_TIMEOUT = 150
Chris Sosab76e0ee2013-05-22 16:55:41 -0700130
131 # We have a long timeout to ensure we don't flakily fail due to other
132 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
133 REBOOT_TIMEOUT = 300
134
Richard Barnette03a0c132012-11-05 12:40:35 -0800135 _INSTALL_TIMEOUT = 240
136
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800137 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
138 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
139 _USB_POWER_TIMEOUT = 5
140 _POWER_CYCLE_TIMEOUT = 10
141
beeps32a63082013-08-22 14:02:29 -0700142 _RPC_PROXY_URL = 'http://localhost:%d'
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800143
Richard Barnette82c35912012-11-20 10:09:10 -0800144 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
145 'rpm_recovery_boards', type=str).split(',')
146
147 _MAX_POWER_CYCLE_ATTEMPTS = 6
148 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
149 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
150 'host[0-9]+')
151 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
152 'in_illuminance0_raw',
153 'illuminance0_input']
154 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
155 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800156 _DETECTABLE_LABELS = []
157 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
158 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700159
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800160 # Constants used in ping_wait_up() and ping_wait_down().
161 #
162 # _PING_WAIT_COUNT is the approximate number of polling
163 # cycles to use when waiting for a host state change.
164 #
165 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
166 # for arguments to the internal _ping_wait_for_status()
167 # method.
168 _PING_WAIT_COUNT = 40
169 _PING_STATUS_DOWN = False
170 _PING_STATUS_UP = True
171
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800172 # Allowed values for the power_method argument.
173
174 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
175 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
176 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
177 POWER_CONTROL_RPM = 'RPM'
178 POWER_CONTROL_SERVO = 'servoj10'
179 POWER_CONTROL_MANUAL = 'manual'
180
181 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
182 POWER_CONTROL_SERVO,
183 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800184
Simran Basi5e6339a2013-03-21 11:34:32 -0700185 _RPM_OUTLET_CHANGED = 'outlet_changed'
186
beeps687243d2013-07-18 15:29:27 -0700187
J. Richard Barnette964fba02012-10-24 17:34:29 -0700188 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800189 def get_servo_arguments(args_dict):
190 """Extract servo options from `args_dict` and return the result.
191
192 Take the provided dictionary of argument options and return
193 a subset that represent standard arguments needed to
194 construct a servo object for a host. The intent is to
195 provide standard argument processing from run_remote_tests
196 for tests that require a servo to operate.
197
198 Recommended usage:
199 ~~~~~~~~
200 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700201 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800202 host = hosts.create_host(machine, servo_args=servo_args)
203 ~~~~~~~~
204
205 @param args_dict Dictionary from which to extract the servo
206 arguments.
207 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700208 servo_args = {}
209 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800210 if arg in args_dict:
211 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700212 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700213
J. Richard Barnette964fba02012-10-24 17:34:29 -0700214
Fang Dengd1c2b732013-08-20 12:59:46 -0700215 def _initialize(self, hostname, servo_args=None, ssh_verbosity_flag='',
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700216 ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700217 *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700218 """Initialize superclasses, and |self.servo|.
219
220 For creating the host servo object, there are three
221 possibilities: First, if the host is a lab system known to
222 have a servo board, we connect to that servo unconditionally.
223 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700224 servo features for testing, it will pass settings for
225 `servo_host`, `servo_port`, or both. If neither of these
226 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700227
228 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700229 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700230 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700231 # self.env is a dictionary of environment variable settings
232 # to be exported for commands run on the host.
233 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
234 # errors that might happen.
235 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700236 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700237 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700238 self._ssh_options = ssh_options
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800239 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700240 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700241 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700242
243
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500244 def get_repair_image_name(self):
245 """Generate a image_name from variables in the global config.
246
247 @returns a str of $board-version/$BUILD.
248
249 """
250 stable_version = global_config.global_config.get_config_value(
251 'CROS', 'stable_cros_version')
252 build_pattern = global_config.global_config.get_config_value(
253 'CROS', 'stable_build_pattern')
254 board = self._get_board_from_afe()
255 if board is None:
256 raise error.AutoservError('DUT has no board attribute, '
257 'cannot be repaired.')
258 return build_pattern % (board, stable_version)
259
260
Scott Zawalski62bacae2013-03-05 10:40:32 -0500261 def _host_in_AFE(self):
262 """Check if the host is an object the AFE knows.
263
264 @returns the host object.
265 """
266 return self._AFE.get_hosts(hostname=self.hostname)
267
268
Chris Sosab76e0ee2013-05-22 16:55:41 -0700269 def lookup_job_repo_url(self):
270 """Looks up the job_repo_url for the host.
271
272 @returns job_repo_url from AFE or None if not found.
273
274 @raises KeyError if the host does not have a job_repo_url
275 """
276 if not self._host_in_AFE():
277 return None
278
279 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700280 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
281 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700282
283
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500284 def clear_cros_version_labels_and_job_repo_url(self):
285 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500286 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400287 return
288
Scott Zawalski62bacae2013-03-05 10:40:32 -0500289 host_list = [self.hostname]
290 labels = self._AFE.get_labels(
291 name__startswith=ds_constants.VERSION_PREFIX,
292 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800293
Scott Zawalski62bacae2013-03-05 10:40:32 -0500294 for label in labels:
295 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500296
beepscb6f1e22013-06-28 19:14:10 -0700297 self.update_job_repo_url(None, None)
298
299
300 def update_job_repo_url(self, devserver_url, image_name):
301 """
302 Updates the job_repo_url host attribute and asserts it's value.
303
304 @param devserver_url: The devserver to use in the job_repo_url.
305 @param image_name: The name of the image to use in the job_repo_url.
306
307 @raises AutoservError: If we failed to update the job_repo_url.
308 """
309 repo_url = None
310 if devserver_url and image_name:
311 repo_url = tools.get_package_url(devserver_url, image_name)
312 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500313 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700314 if self.lookup_job_repo_url() != repo_url:
315 raise error.AutoservError('Failed to update job_repo_url with %s, '
316 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500317
318
Dan Shie9309262013-06-19 22:50:21 -0700319 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400320 """Add cros_version labels and host attribute job_repo_url.
321
322 @param image_name: The name of the image e.g.
323 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700324
Scott Zawalskieadbf702013-03-14 09:23:06 -0400325 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500326 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400327 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500328
Scott Zawalskieadbf702013-03-14 09:23:06 -0400329 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700330 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500331
332 labels = self._AFE.get_labels(name=cros_label)
333 if labels:
334 label = labels[0]
335 else:
336 label = self._AFE.create_label(name=cros_label)
337
338 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700339 self.update_job_repo_url(devserver_url, image_name)
340
341
beepsdae65fd2013-07-26 16:24:41 -0700342 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700343 """
344 Make sure job_repo_url of this host is valid.
345
joychen03eaad92013-06-26 09:55:21 -0700346 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700347 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
348 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
349 download and extract it. If the devserver embedded in the url is
350 unresponsive, update the job_repo_url of the host after staging it on
351 another devserver.
352
353 @param job_repo_url: A url pointing to the devserver where the autotest
354 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700355 @param tag: The tag from the server job, in the format
356 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700357
358 @raises DevServerException: If we could not resolve a devserver.
359 @raises AutoservError: If we're unable to save the new job_repo_url as
360 a result of choosing a new devserver because the old one failed to
361 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700362 @raises urllib2.URLError: If the devserver embedded in job_repo_url
363 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700364 """
365 job_repo_url = self.lookup_job_repo_url()
366 if not job_repo_url:
367 logging.warning('No job repo url set on host %s', self.hostname)
368 return
369
370 logging.info('Verifying job repo url %s', job_repo_url)
371 devserver_url, image_name = tools.get_devserver_build_from_package_url(
372 job_repo_url)
373
beeps0c865032013-07-30 11:37:06 -0700374 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700375
376 logging.info('Staging autotest artifacts for %s on devserver %s',
377 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700378
379 start_time = time.time()
beepscb6f1e22013-06-28 19:14:10 -0700380 ds.stage_artifacts(image_name, ['autotest'])
beeps687243d2013-07-18 15:29:27 -0700381 stage_time = time.time() - start_time
382
383 # Record how much of the verification time comes from a devserver
384 # restage. If we're doing things right we should not see multiple
385 # devservers for a given board/build/branch path.
386 try:
387 board, build_type, branch = site_utils.ParseBuildName(
388 image_name)[:3]
389 except site_utils.ParseBuildNameException as e:
390 pass
391 else:
beeps0c865032013-07-30 11:37:06 -0700392 devserver = devserver_url[
393 devserver_url.find('/')+2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700394 stats_key = {
395 'board': board,
396 'build_type': build_type,
397 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700398 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700399 }
400 stats.Gauge('verify_job_repo_url').send(
401 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
402 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700403
Scott Zawalskieadbf702013-03-14 09:23:06 -0400404
Dan Shi0f466e82013-02-22 15:44:58 -0800405 def _try_stateful_update(self, update_url, force_update, updater):
406 """Try to use stateful update to initialize DUT.
407
408 When DUT is already running the same version that machine_install
409 tries to install, stateful update is a much faster way to clean up
410 the DUT for testing, compared to a full reimage. It is implemeted
411 by calling autoupdater.run_update, but skipping updating root, as
412 updating the kernel is time consuming and not necessary.
413
414 @param update_url: url of the image.
415 @param force_update: Set to True to update the image even if the DUT
416 is running the same version.
417 @param updater: ChromiumOSUpdater instance used to update the DUT.
418 @returns: True if the DUT was updated with stateful update.
419
420 """
421 if not updater.check_version():
422 return False
423 if not force_update:
424 logging.info('Canceling stateful update because the new and '
425 'old versions are the same.')
426 return False
427 # Following folders should be rebuilt after stateful update.
428 # A test file is used to confirm each folder gets rebuilt after
429 # the stateful update.
430 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
431 test_file = '.test_file_to_be_deleted'
432 for folder in folders_to_check:
433 touch_path = os.path.join(folder, test_file)
434 self.run('touch %s' % touch_path)
435
436 if not updater.run_update(force_update=True, update_root=False):
437 return False
438
439 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700440 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800441 check_file_cmd = 'test -f %s; echo $?'
442 for folder in folders_to_check:
443 test_file_path = os.path.join(folder, test_file)
444 result = self.run(check_file_cmd % test_file_path,
445 ignore_status=True)
446 if result.exit_status == 1:
447 return False
448 return True
449
450
J. Richard Barnette7275b612013-06-04 18:13:11 -0700451 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800452 """After the DUT is updated, confirm machine_install succeeded.
453
454 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700455 @param expected_kernel: kernel expected to be active after reboot,
456 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800457
458 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700459 # Touch the lab machine file to leave a marker that
460 # distinguishes this image from other test images.
461 # Afterwards, we must re-run the autoreboot script because
462 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800463 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800464 self.run('start autoreboot')
465
J. Richard Barnette7275b612013-06-04 18:13:11 -0700466 # Figure out the newly active kernel.
467 active_kernel, _ = updater.get_kernel_state()
468
469 # Check for rollback due to a bad build.
470 if expected_kernel and active_kernel != expected_kernel:
471 # Print out some information to make it easier to debug
472 # the rollback.
Dan Shi0f466e82013-02-22 15:44:58 -0800473 logging.debug('Dumping partition table.')
Dan Shi346725f2013-03-20 15:22:38 -0700474 self.run('cgpt show $(rootdev -s -d)')
Dan Shi0f466e82013-02-22 15:44:58 -0800475 logging.debug('Dumping crossystem for firmware debugging.')
Dan Shi346725f2013-03-20 15:22:38 -0700476 self.run('crossystem --all')
Dan Shi0f466e82013-02-22 15:44:58 -0800477 raise autoupdater.ChromiumOSError(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700478 'Build %s failed to boot on %s; system rolled back '
479 'to previous build' % (updater.update_version,
480 self.hostname))
Dan Shi0f466e82013-02-22 15:44:58 -0800481
J. Richard Barnette7275b612013-06-04 18:13:11 -0700482 # Check that we've got the build we meant to install.
483 if not updater.check_version_to_confirm_install():
484 raise autoupdater.ChromiumOSError(
485 'Failed to update %s to build %s; found build '
486 '%s instead' % (self.hostname,
487 updater.update_version,
488 updater.get_build_id()))
Scott Zawalski62bacae2013-03-05 10:40:32 -0500489
J. Richard Barnette7275b612013-06-04 18:13:11 -0700490 # Make sure chromeos-setgoodkernel runs.
491 try:
Dan Shi0f466e82013-02-22 15:44:58 -0800492 utils.poll_for_condition(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700493 lambda: (updater.get_kernel_tries(active_kernel) == 0
494 and updater.get_kernel_success(active_kernel)),
495 exception=autoupdater.ChromiumOSError(),
Dan Shi0f466e82013-02-22 15:44:58 -0800496 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
J. Richard Barnette7275b612013-06-04 18:13:11 -0700497 except autoupdater.ChromiumOSError as e:
498 services_status = self.run('status system-services').stdout
499 if services_status != 'system-services start/running\n':
500 event = ('Chrome failed to reach login screen')
501 else:
502 event = ('update-engine failed to call '
503 'chromeos-setgoodkernel')
504 raise autoupdater.ChromiumOSError(
505 'After update and reboot, %s '
506 'within %d seconds' % (event,
507 self._KERNEL_UPDATE_TIMEOUT))
Dan Shi0f466e82013-02-22 15:44:58 -0800508
509
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700510 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400511 """Stage a build on a devserver and return the update_url.
512
513 @param image_name: a name like lumpy-release/R27-3837.0.0
514 @returns an update URL like:
515 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
516 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700517 if not image_name:
518 image_name = self.get_repair_image_name()
519 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400520 devserver = dev_server.ImageServer.resolve(image_name)
521 devserver.trigger_download(image_name, synchronous=False)
522 return tools.image_url_pattern() % (devserver.url(), image_name)
523
524
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700525 def stage_image_for_servo(self, image_name=None):
526 """Stage a build on a devserver and return the update_url.
527
528 @param image_name: a name like lumpy-release/R27-3837.0.0
529 @returns an update URL like:
530 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
531 """
532 if not image_name:
533 image_name = self.get_repair_image_name()
534 logging.info('Staging build for servo install: %s', image_name)
535 devserver = dev_server.ImageServer.resolve(image_name)
536 devserver.stage_artifacts(image_name, ['test_image'])
537 return devserver.get_test_image_url(image_name)
538
539
beepse539be02013-07-31 21:57:39 -0700540 def stage_factory_image_for_servo(self, image_name):
541 """Stage a build on a devserver and return the update_url.
542
543 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700544
beepse539be02013-07-31 21:57:39 -0700545 @return: An update URL, eg:
546 http://<devserver>/static/canary-channel/\
547 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700548
549 @raises: ValueError if the factory artifact name is missing from
550 the config.
551
beepse539be02013-07-31 21:57:39 -0700552 """
553 if not image_name:
554 logging.error('Need an image_name to stage a factory image.')
555 return
556
beeps12c0a3c2013-09-03 11:58:27 -0700557 factory_artifact = global_config.global_config.get_config_value(
558 'CROS', 'factory_artifact', type=str, default='')
559 if not factory_artifact:
560 raise ValueError('Cannot retrieve the factory artifact name from '
561 'autotest config, and hence cannot stage factory '
562 'artifacts.')
563
beepse539be02013-07-31 21:57:39 -0700564 logging.info('Staging build for servo install: %s', image_name)
565 devserver = dev_server.ImageServer.resolve(image_name)
566 devserver.stage_artifacts(
567 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700568 [factory_artifact],
569 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700570
571 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
572
573
Chris Sosaa3ac2152012-05-23 22:23:13 -0700574 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500575 local_devserver=False, repair=False):
576 """Install the DUT.
577
Dan Shi0f466e82013-02-22 15:44:58 -0800578 Use stateful update if the DUT is already running the same build.
579 Stateful update does not update kernel and tends to run much faster
580 than a full reimage. If the DUT is running a different build, or it
581 failed to do a stateful update, full update, including kernel update,
582 will be applied to the DUT.
583
Scott Zawalskieadbf702013-03-14 09:23:06 -0400584 Once a host enters machine_install its cros_version label will be
585 removed as well as its host attribute job_repo_url (used for
586 package install).
587
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500588 @param update_url: The url to use for the update
589 pattern: http://$devserver:###/update/$build
590 If update_url is None and repair is True we will install the
591 stable image listed in global_config under
592 CROS.stable_cros_version.
593 @param force_update: Force an update even if the version installed
594 is the same. Default:False
595 @param local_devserver: Used by run_remote_test to allow people to
596 use their local devserver. Default: False
597 @param repair: Whether or not we are in repair mode. This adds special
598 cases for repairing a machine like starting update_engine.
599 Setting repair to True sets force_update to True as well.
600 default: False
601 @raises autoupdater.ChromiumOSError
602
603 """
Dan Shi7458bf62013-06-10 12:50:16 -0700604 if update_url:
605 logging.debug('update url is set to %s', update_url)
606 else:
607 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700608 if self._parser.options.image:
609 requested_build = self._parser.options.image
610 if requested_build.startswith('http://'):
611 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700612 logging.debug('update url is retrieved from requested_build'
613 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700614 else:
615 # Try to stage any build that does not start with
616 # http:// on the devservers defined in
617 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700618 update_url = self._stage_image_for_update(requested_build)
619 logging.debug('Build staged, and update_url is set to: %s',
620 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700621 elif repair:
622 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700623 logging.debug('Build staged, and update_url is set to: %s',
624 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400625 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700626 raise autoupdater.ChromiumOSError(
627 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500628
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500629 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800630 # In case the system is in a bad state, we always reboot the machine
631 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700632 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500633 self.run('stop update-engine; start update-engine')
634 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800635
Chris Sosaa3ac2152012-05-23 22:23:13 -0700636 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700637 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800638 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400639 # Remove cros-version and job_repo_url host attribute from host.
640 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800641 # If the DUT is already running the same build, try stateful update
642 # first. Stateful update does not update kernel and tends to run much
643 # faster than a full reimage.
644 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700645 updated = self._try_stateful_update(
646 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800647 if updated:
648 logging.info('DUT is updated with stateful update.')
649 except Exception as e:
650 logging.exception(e)
651 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700652
Dan Shi0f466e82013-02-22 15:44:58 -0800653 inactive_kernel = None
654 # Do a full update if stateful update is not applicable or failed.
655 if not updated:
656 # In case the system is in a bad state, we always reboot the
657 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700658 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700659
660 # TODO(sosa): Remove temporary hack to get rid of bricked machines
661 # that can't update due to a corrupted policy.
662 self.run('rm -rf /var/lib/whitelist')
663 self.run('touch /var/lib/whitelist')
664 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400665 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700666
Dan Shi0f466e82013-02-22 15:44:58 -0800667 if updater.run_update(force_update):
668 updated = True
669 # Figure out active and inactive kernel.
670 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700671
Dan Shi0f466e82013-02-22 15:44:58 -0800672 # Ensure inactive kernel has higher priority than active.
673 if (updater.get_kernel_priority(inactive_kernel)
674 < updater.get_kernel_priority(active_kernel)):
675 raise autoupdater.ChromiumOSError(
676 'Update failed. The priority of the inactive kernel'
677 ' partition is less than that of the active kernel'
678 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700679
Dan Shi0f466e82013-02-22 15:44:58 -0800680 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700681 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700682
Dan Shi0f466e82013-02-22 15:44:58 -0800683 if updated:
684 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400685 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700686 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800687
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700688 # Clean up any old autotest directories which may be lying around.
689 for path in global_config.global_config.get_config_value(
690 'AUTOSERV', 'client_autodir_paths', type=list):
691 self.run('rm -rf ' + path)
692
693
Dan Shi10e992b2013-08-30 11:02:59 -0700694 def show_update_engine_log(self):
695 """Output update engine log."""
696 logging.debug('Dumping %s', constants.UPDATE_ENGINE_LOG)
697 self.run('cat %s' % constants.UPDATE_ENGINE_LOG)
698
699
Richard Barnette82c35912012-11-20 10:09:10 -0800700 def _get_board_from_afe(self):
701 """Retrieve this host's board from its labels in the AFE.
702
703 Looks for a host label of the form "board:<board>", and
704 returns the "<board>" part of the label. `None` is returned
705 if there is not a single, unique label matching the pattern.
706
707 @returns board from label, or `None`.
708 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700709 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800710
711
712 def get_build(self):
713 """Retrieve the current build for this Host from the AFE.
714
715 Looks through this host's labels in the AFE to determine its build.
716
717 @returns The current build or None if it could not find it or if there
718 were multiple build labels assigned to this host.
719 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700720 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800721
722
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500723 def _install_repair(self):
724 """Attempt to repair this host using upate-engine.
725
726 If the host is up, try installing the DUT with a stable
727 "repair" version of Chrome OS as defined in the global_config
728 under CROS.stable_cros_version.
729
Scott Zawalski62bacae2013-03-05 10:40:32 -0500730 @raises AutoservRepairMethodNA if the DUT is not reachable.
731 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500732
733 """
734 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500735 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500736
737 logging.info('Attempting to reimage machine to repair image.')
738 try:
739 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700740 except autoupdater.ChromiumOSError as e:
741 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500742 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500743 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500744
745
Scott Zawalski62bacae2013-03-05 10:40:32 -0500746 def servo_install(self, image_url=None):
747 """
748 Re-install the OS on the DUT by:
749 1) installing a test image on a USB storage device attached to the Servo
750 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800751 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700752 3) installing the image with chromeos-install.
753
Scott Zawalski62bacae2013-03-05 10:40:32 -0500754 @param image_url: If specified use as the url to install on the DUT.
755 otherwise boot the currently staged image on the USB stick.
Richard Barnette03a0c132012-11-05 12:40:35 -0800756
Scott Zawalski62bacae2013-03-05 10:40:32 -0500757 @raises AutoservError if the image fails to boot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800758 """
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700759 self.servo.install_recovery_image(image_url)
Richard Barnette03a0c132012-11-05 12:40:35 -0800760 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500761 raise error.AutoservRepairFailure(
762 'DUT failed to boot from USB after %d seconds' %
763 self.USB_BOOT_TIMEOUT)
764
765 self.run('chromeos-install --yes', timeout=self._INSTALL_TIMEOUT)
Richard Barnette03a0c132012-11-05 12:40:35 -0800766 self.servo.power_long_press()
Fang Dengafb88142013-05-30 17:44:31 -0700767 self.servo.switch_usbkey('off')
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700768 # We *must* use power_on() here; on Parrot it's how we get
769 # out of recovery mode.
770 self.servo.get_power_state_controller().power_on()
Richard Barnette03a0c132012-11-05 12:40:35 -0800771 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
772 raise error.AutoservError('DUT failed to reboot installed '
773 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500774 self.BOOT_TIMEOUT)
775
776
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700777 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500778 """Reinstall the DUT utilizing servo and a test image.
779
780 Re-install the OS on the DUT by:
781 1) installing a test image on a USB storage device attached to the Servo
782 board,
783 2) booting that image in recovery mode, and then
784 3) installing the image with chromeos-install.
785
Scott Zawalski62bacae2013-03-05 10:40:32 -0500786 @raises AutoservRepairMethodNA if the device does not have servo
787 support.
788
789 """
790 if not self.servo:
791 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
792 'DUT has no servo support.')
793
794 logging.info('Attempting to recovery servo enabled device with '
795 'servo_repair_reinstall')
796
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700797 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500798 self.servo_install(image_url)
799
800
801 def _servo_repair_power(self):
802 """Attempt to repair DUT using an attached Servo.
803
804 Attempt to power on the DUT via power_long_press.
805
806 @raises AutoservRepairMethodNA if the device does not have servo
807 support.
808 @raises AutoservRepairFailure if the repair fails for any reason.
809 """
810 if not self.servo:
811 raise error.AutoservRepairMethodNA('Repair Power NA: '
812 'DUT has no servo support.')
813
814 logging.info('Attempting to recover servo enabled device by '
815 'powering it off and on.')
816 self.servo.get_power_state_controller().power_off()
817 self.servo.get_power_state_controller().power_on()
818 if self.wait_up(self.BOOT_TIMEOUT):
819 return
820
821 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800822
823
Richard Barnette82c35912012-11-20 10:09:10 -0800824 def _powercycle_to_repair(self):
825 """Utilize the RPM Infrastructure to bring the host back up.
826
827 If the host is not up/repaired after the first powercycle we utilize
828 auto fallback to the last good install by powercycling and rebooting the
829 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500830
831 @raises AutoservRepairMethodNA if the device does not support remote
832 power.
833 @raises AutoservRepairFailure if the repair fails for any reason.
834
Richard Barnette82c35912012-11-20 10:09:10 -0800835 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500836 if not self.has_power():
837 raise error.AutoservRepairMethodNA('Device does not support power.')
838
Richard Barnette82c35912012-11-20 10:09:10 -0800839 logging.info('Attempting repair via RPM powercycle.')
840 failed_cycles = 0
841 self.power_cycle()
842 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
843 failed_cycles += 1
844 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -0500845 raise error.AutoservRepairFailure(
846 'Powercycled host %s %d times; device did not come back'
847 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -0800848 self.power_cycle()
849 if failed_cycles == 0:
850 logging.info('Powercycling was successful first time.')
851 else:
852 logging.info('Powercycling was successful after %d failures.',
853 failed_cycles)
854
855
856 def repair_full(self):
857 """Repair a host for repair level NO_PROTECTION.
858
859 This overrides the base class function for repair; it does
860 not call back to the parent class, but instead offers a
861 simplified implementation based on the capabilities in the
862 Chrome OS test lab.
863
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700864 If `self.verify()` fails, the following procedures are
865 attempted:
866 1. Try to re-install to a known stable image using
867 auto-update.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500868 2. If there's a servo for the DUT, try to power the DUT off and
869 on.
870 3. If there's a servo for the DUT, try to re-install via
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700871 the servo.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500872 4. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800873 by power-cycling.
874
875 As with the parent method, the last operation performed on
876 the DUT must be to call `self.verify()`; if that call fails,
877 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700878
Scott Zawalski62bacae2013-03-05 10:40:32 -0500879 @raises AutoservRepairTotalFailure if the repair process fails to
880 fix the DUT.
Richard Barnette82c35912012-11-20 10:09:10 -0800881 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500882 # TODO(scottz): This should use something similar to label_decorator,
883 # but needs to be populated in order so DUTs are repaired with the
884 # least amount of effort.
885 repair_funcs = [self._install_repair, self._servo_repair_power,
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700886 self._servo_repair_reinstall,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500887 self._powercycle_to_repair]
888 errors = []
889 for repair_func in repair_funcs:
890 try:
891 repair_func()
892 self.verify()
893 return
894 except Exception as e:
895 logging.warn('Failed to repair device: %s', e)
896 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500897
Scott Zawalski62bacae2013-03-05 10:40:32 -0500898 raise error.AutoservRepairTotalFailure(
899 'All attempts at repairing the device failed:\n%s' %
900 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -0800901
902
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700903 def close(self):
beeps32a63082013-08-22 14:02:29 -0700904 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -0700905 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700906
907
Simran Basi5e6339a2013-03-21 11:34:32 -0700908 def _cleanup_poweron(self):
909 """Special cleanup method to make sure hosts always get power back."""
910 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
911 hosts = afe.get_hosts(hostname=self.hostname)
912 if not hosts or not (self._RPM_OUTLET_CHANGED in
913 hosts[0].attributes):
914 return
915 logging.debug('This host has recently interacted with the RPM'
916 ' Infrastructure. Ensuring power is on.')
917 try:
918 self.power_on()
919 except rpm_client.RemotePowerException:
920 # If cleanup has completed but there was an issue with the RPM
921 # Infrastructure, log an error message rather than fail cleanup
922 logging.error('Failed to turn Power On for this host after '
923 'cleanup through the RPM Infrastructure.')
924 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
925 hostname=self.hostname)
926
927
beepsc87ff602013-07-31 21:53:00 -0700928 def _is_factory_image(self):
929 """Checks if the image on the DUT is a factory image.
930
931 @return: True if the image on the DUT is a factory image.
932 False otherwise.
933 """
934 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
935 return result.exit_status == 0
936
937
938 def _restart_ui(self):
939 """Restarts ui.
940
941 @raises: FactoryImageCheckerException for factory images, since
942 we cannot attempt to restart ui on them.
943 error.AutoservRunError for any other type of error that
944 occurs while restarting ui.
945 """
946 if self._is_factory_image():
947 raise FactoryImageCheckerException('Cannot restart ui on factory '
948 'images')
949
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700950 client_at = autotest.Autotest(self)
beepsc87ff602013-07-31 21:53:00 -0700951 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
952 '_clear_login_prompt_state')
953 self.run('restart ui')
954 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
955 '_wait_for_login_prompt')
956
957
958 def cleanup(self):
Richard Barnette82c35912012-11-20 10:09:10 -0800959 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500960 try:
beepsc87ff602013-07-31 21:53:00 -0700961 self._restart_ui()
962 except (error.AutotestRunError, error.AutoservRunError,
963 FactoryImageCheckerException):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500964 logging.warn('Unable to restart ui, rebooting device.')
965 # Since restarting the UI fails fall back to normal Autotest
966 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -0700967 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -0700968 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -0700969 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -0700970 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700971
972
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700973 def reboot(self, **dargs):
974 """
975 This function reboots the site host. The more generic
976 RemoteHost.reboot() performs sync and sleeps for 5
977 seconds. This is not necessary for Chrome OS devices as the
978 sync should be finished in a short time during the reboot
979 command.
980 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800981 if 'reboot_cmd' not in dargs:
982 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
983 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700984 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800985 if 'fastsync' not in dargs:
986 dargs['fastsync'] = True
Fang Deng0ca40e22013-08-27 17:47:44 -0700987 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700988
989
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700990 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800991 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700992
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800993 Tests for the following conditions:
994 1. All conditions tested by the parent version of this
995 function.
996 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -0700997 3. Sufficient space in /mnt/stateful_partition/encrypted.
998 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700999
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001000 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001001 super(CrosHost, self).verify_software()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001002 self.check_diskspace(
1003 '/mnt/stateful_partition',
1004 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001005 'SERVER', 'gb_diskspace_required', type=float,
1006 default=20.0))
1007 self.check_diskspace(
1008 '/mnt/stateful_partition/encrypted',
1009 global_config.global_config.get_config_value(
1010 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1011 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001012
1013 # Factory images don't run update engine,
1014 # goofy controls dbus on these DUTs.
1015 if not self._is_factory_image():
1016 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001017 # Makes sure python is present, loads and can use built in functions.
1018 # We have seen cases where importing cPickle fails with undefined
1019 # symbols in cPickle.so.
1020 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001021
1022
Fang Deng96667ca2013-08-01 17:46:18 -07001023 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1024 connect_timeout=None, alive_interval=None):
1025 """Override default make_ssh_command to use options tuned for Chrome OS.
1026
1027 Tuning changes:
1028 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1029 connection failure. Consistency with remote_access.sh.
1030
1031 - ServerAliveInterval=180; which causes SSH to ping connection every
1032 180 seconds. In conjunction with ServerAliveCountMax ensures
1033 that if the connection dies, Autotest will bail out quickly.
1034 Originally tried 60 secs, but saw frequent job ABORTS where
1035 the test completed successfully.
1036
1037 - ServerAliveCountMax=3; consistency with remote_access.sh.
1038
1039 - ConnectAttempts=4; reduce flakiness in connection errors;
1040 consistency with remote_access.sh.
1041
1042 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1043 Host keys change with every new installation, don't waste
1044 memory/space saving them.
1045
1046 - SSH protocol forced to 2; needed for ServerAliveInterval.
1047
1048 @param user User name to use for the ssh connection.
1049 @param port Port on the target host to use for ssh connection.
1050 @param opts Additional options to the ssh command.
1051 @param hosts_file Ignored.
1052 @param connect_timeout Ignored.
1053 @param alive_interval Ignored.
1054 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001055 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1056 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001057 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1058 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1059 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1060 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001061 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1062 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001063
1064
beeps32a63082013-08-22 14:02:29 -07001065 def _create_ssh_tunnel(self, port, local_port):
1066 """Create an ssh tunnel from local_port to port.
1067
1068 @param port: remote port on the host.
1069 @param local_port: local forwarding port.
1070
1071 @return: the tunnel process.
1072 """
1073 # Chrome OS on the target closes down most external ports
1074 # for security. We could open the port, but doing that
1075 # would conflict with security tests that check that only
1076 # expected ports are open. So, to get to the port on the
1077 # target we use an ssh tunnel.
1078 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1079 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1080 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1081 logging.debug('Full tunnel command: %s', tunnel_cmd)
1082 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1083 logging.debug('Started ssh tunnel, local = %d'
1084 ' remote = %d, pid = %d',
1085 local_port, port, tunnel_proc.pid)
1086 return tunnel_proc
1087
1088
1089 def _setup_rpc(self, port, command_name):
1090 """Sets up a tunnel process and performs rpc connection book keeping.
1091
1092 This method assumes that xmlrpc and jsonrpc never conflict, since
1093 we can only either have an xmlrpc or a jsonrpc server listening on
1094 a remote port. As such, it enforces a single proxy->remote port
1095 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1096 and then tries to start an xmlrpc proxy forwarded to the same port,
1097 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1098
1099 1. None of the methods on the xmlrpc proxy will work because
1100 the server listening on B is jsonrpc.
1101
1102 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1103 server, as the only use case currently is goofy, which is tied to
1104 the factory image. It is much easier to handle a failed xmlrpc
1105 call on the client than it is to terminate goofy in this scenario,
1106 as doing the latter might leave the DUT in a hard to recover state.
1107
1108 With the current implementation newer rpc proxy connections will
1109 terminate the tunnel processes of older rpc connections tunneling
1110 to the same remote port. If methods are invoked on the client
1111 after this has happened they will fail with connection closed errors.
1112
1113 @param port: The remote forwarding port.
1114 @param command_name: The name of the remote process, to terminate
1115 using pkill.
1116
1117 @return A url that we can use to initiate the rpc connection.
1118 """
1119 self.rpc_disconnect(port)
1120 local_port = utils.get_unused_port()
1121 tunnel_proc = self._create_ssh_tunnel(port, local_port)
1122 self._rpc_proxy_map[port] = (command_name, tunnel_proc)
1123 return self._RPC_PROXY_URL % local_port
1124
1125
Christopher Wileyd78249a2013-03-01 13:05:31 -08001126 def xmlrpc_connect(self, command, port, command_name=None,
1127 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001128 """Connect to an XMLRPC server on the host.
1129
1130 The `command` argument should be a simple shell command that
1131 starts an XMLRPC server on the given `port`. The command
1132 must not daemonize, and must terminate cleanly on SIGTERM.
1133 The command is started in the background on the host, and a
1134 local XMLRPC client for the server is created and returned
1135 to the caller.
1136
1137 Note that the process of creating an XMLRPC client makes no
1138 attempt to connect to the remote server; the caller is
1139 responsible for determining whether the server is running
1140 correctly, and is ready to serve requests.
1141
Christopher Wileyd78249a2013-03-01 13:05:31 -08001142 Optionally, the caller can pass ready_test_name, a string
1143 containing the name of a method to call on the proxy. This
1144 method should take no parameters and return successfully only
1145 when the server is ready to process client requests. When
1146 ready_test_name is set, xmlrpc_connect will block until the
1147 proxy is ready, and throw a TestError if the server isn't
1148 ready by timeout_seconds.
1149
beeps32a63082013-08-22 14:02:29 -07001150 If a server is already running on the remote port, this
1151 method will kill it and disconnect the tunnel process
1152 associated with the connection before establishing a new one,
1153 by consulting the rpc_proxy_map in rpc_disconnect.
1154
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001155 @param command Shell command to start the server.
1156 @param port Port number on which the server is expected to
1157 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001158 @param command_name String to use as input to `pkill` to
1159 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001160 @param ready_test_name String containing the name of a
1161 method defined on the XMLRPC server.
1162 @param timeout_seconds Number of seconds to wait
1163 for the server to become 'ready.' Will throw a
1164 TestFail error if server is not ready in time.
1165
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001166 """
beeps32a63082013-08-22 14:02:29 -07001167 rpc_url = self._setup_rpc(port, command_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001168 # Start the server on the host. Redirection in the command
1169 # below is necessary, because 'ssh' won't terminate until
1170 # background child processes close stdin, stdout, and
1171 # stderr.
1172 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
beeps32a63082013-08-22 14:02:29 -07001173 try:
1174 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
1175 except Exception as e:
1176 self.rpc_disconnect(port)
1177 raise
1178
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001179 logging.debug('Started XMLRPC server on host %s, pid = %s',
1180 self.hostname, remote_pid)
1181
Christopher Wileyd78249a2013-03-01 13:05:31 -08001182 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
1183 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001184 # retry.retry logs each attempt; calculate delay_sec to
1185 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001186 @retry.retry((socket.error,
1187 xmlrpclib.ProtocolError,
1188 httplib.BadStatusLine),
Christopher Wileyd78249a2013-03-01 13:05:31 -08001189 timeout_min=timeout_seconds/60.0,
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001190 delay_sec=min(max(timeout_seconds/20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001191 def ready_test():
1192 """ Call proxy.ready_test_name(). """
1193 getattr(proxy, ready_test_name)()
1194 successful = False
1195 try:
1196 logging.info('Waiting %d seconds for XMLRPC server '
1197 'to start.', timeout_seconds)
1198 ready_test()
1199 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001200 finally:
1201 if not successful:
1202 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001203 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001204 logging.info('XMLRPC server started successfully.')
1205 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001206
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001207
beeps32a63082013-08-22 14:02:29 -07001208 def jsonrpc_connect(self, port):
1209 """Creates a jsonrpc proxy connection through an ssh tunnel.
1210
1211 This method exists to facilitate communication with goofy (which is
1212 the default system manager on all factory images) and as such, leaves
1213 most of the rpc server sanity checking to the caller. Unlike
1214 xmlrpc_connect, this method does not facilitate the creation of a remote
1215 jsonrpc server, as the only clients of this code are factory tests,
1216 for which the goofy system manager is built in to the image and starts
1217 when the target boots.
1218
1219 One can theoretically create multiple jsonrpc proxies all forwarded
1220 to the same remote port, provided the remote port has an rpc server
1221 listening. However, in doing so we stand the risk of leaking an
1222 existing tunnel process, so we always disconnect any older tunnels
1223 we might have through rpc_disconnect.
1224
1225 @param port: port on the remote host that is serving this proxy.
1226
1227 @return: The client proxy.
1228 """
1229 if not jsonrpclib:
1230 logging.warning('Jsonrpclib could not be imported. Check that '
1231 'site-packages contains jsonrpclib.')
1232 return None
1233
1234 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1235
1236 logging.info('Established a jsonrpc connection through port %s.', port)
1237 return proxy
1238
1239
1240 def rpc_disconnect(self, port):
1241 """Disconnect from an RPC server on the host.
1242
1243 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001244 the given `port`. Also closes the local ssh tunnel created
1245 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001246 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001247 client object; however disconnection will cause all
1248 subsequent calls to methods on the object to fail.
1249
1250 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001251 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001252
1253 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001254 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001255 """
beeps32a63082013-08-22 14:02:29 -07001256 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001257 return
beeps32a63082013-08-22 14:02:29 -07001258 entry = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001259 remote_name = entry[0]
1260 tunnel_proc = entry[1]
1261 if remote_name:
1262 # We use 'pkill' to find our target process rather than
1263 # a PID, because the host may have rebooted since
1264 # connecting, and we don't want to kill an innocent
1265 # process with the same PID.
1266 #
1267 # 'pkill' helpfully exits with status 1 if no target
1268 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001269 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001270 # status.
1271 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
1272
1273 if tunnel_proc.poll() is None:
1274 tunnel_proc.terminate()
1275 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1276 else:
1277 logging.debug('Tunnel pid %d terminated early, status %d',
1278 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001279 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001280
1281
beeps32a63082013-08-22 14:02:29 -07001282 def rpc_disconnect_all(self):
1283 """Disconnect all known RPC proxy ports."""
1284 for port in self._rpc_proxy_map.keys():
1285 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001286
1287
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001288 def _ping_check_status(self, status):
1289 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001290
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001291 @param status Check the ping status against this value.
1292 @return True iff `status` and the result of ping are the same
1293 (i.e. both True or both False).
1294
1295 """
1296 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1297 return not (status ^ (ping_val == 0))
1298
1299 def _ping_wait_for_status(self, status, timeout):
1300 """Wait for the host to have a given status (UP or DOWN).
1301
1302 Status is checked by polling. Polling will not last longer
1303 than the number of seconds in `timeout`. The polling
1304 interval will be long enough that only approximately
1305 _PING_WAIT_COUNT polling cycles will be executed, subject
1306 to a maximum interval of about one minute.
1307
1308 @param status Waiting will stop immediately if `ping` of the
1309 host returns this status.
1310 @param timeout Poll for at most this many seconds.
1311 @return True iff the host status from `ping` matched the
1312 requested status at the time of return.
1313
1314 """
1315 # _ping_check_status() takes about 1 second, hence the
1316 # "- 1" in the formula below.
1317 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1318 end_time = time.time() + timeout
1319 while time.time() <= end_time:
1320 if self._ping_check_status(status):
1321 return True
1322 if poll_interval > 0:
1323 time.sleep(poll_interval)
1324
1325 # The last thing we did was sleep(poll_interval), so it may
1326 # have been too long since the last `ping`. Check one more
1327 # time, just to be sure.
1328 return self._ping_check_status(status)
1329
1330 def ping_wait_up(self, timeout):
1331 """Wait for the host to respond to `ping`.
1332
1333 N.B. This method is not a reliable substitute for
1334 `wait_up()`, because a host that responds to ping will not
1335 necessarily respond to ssh. This method should only be used
1336 if the target DUT can be considered functional even if it
1337 can't be reached via ssh.
1338
1339 @param timeout Minimum time to allow before declaring the
1340 host to be non-responsive.
1341 @return True iff the host answered to ping before the timeout.
1342
1343 """
1344 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001345
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001346 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001347 """Wait until the host no longer responds to `ping`.
1348
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001349 This function can be used as a slightly faster version of
1350 `wait_down()`, by avoiding potentially long ssh timeouts.
1351
1352 @param timeout Minimum time to allow for the host to become
1353 non-responsive.
1354 @return True iff the host quit answering ping before the
1355 timeout.
1356
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001357 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001358 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001359
1360 def test_wait_for_sleep(self):
1361 """Wait for the client to enter low-power sleep mode.
1362
1363 The test for "is asleep" can't distinguish a system that is
1364 powered off; to confirm that the unit was asleep, it is
1365 necessary to force resume, and then call
1366 `test_wait_for_resume()`.
1367
1368 This function is expected to be called from a test as part
1369 of a sequence like the following:
1370
1371 ~~~~~~~~
1372 boot_id = host.get_boot_id()
1373 # trigger sleep on the host
1374 host.test_wait_for_sleep()
1375 # trigger resume on the host
1376 host.test_wait_for_resume(boot_id)
1377 ~~~~~~~~
1378
1379 @exception TestFail The host did not go to sleep within
1380 the allowed time.
1381 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001382 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001383 raise error.TestFail(
1384 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001385 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001386
1387
1388 def test_wait_for_resume(self, old_boot_id):
1389 """Wait for the client to resume from low-power sleep mode.
1390
1391 The `old_boot_id` parameter should be the value from
1392 `get_boot_id()` obtained prior to entering sleep mode. A
1393 `TestFail` exception is raised if the boot id changes.
1394
1395 See @ref test_wait_for_sleep for more on this function's
1396 usage.
1397
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001398 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001399 target host went to sleep.
1400
1401 @exception TestFail The host did not respond within the
1402 allowed time.
1403 @exception TestFail The host responded, but the boot id test
1404 indicated a reboot rather than a sleep
1405 cycle.
1406 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001407 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001408 raise error.TestFail(
1409 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001410 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001411 else:
1412 new_boot_id = self.get_boot_id()
1413 if new_boot_id != old_boot_id:
1414 raise error.TestFail(
1415 'client rebooted, but sleep was expected'
1416 ' (old boot %s, new boot %s)'
1417 % (old_boot_id, new_boot_id))
1418
1419
1420 def test_wait_for_shutdown(self):
1421 """Wait for the client to shut down.
1422
1423 The test for "has shut down" can't distinguish a system that
1424 is merely asleep; to confirm that the unit was down, it is
1425 necessary to force boot, and then call test_wait_for_boot().
1426
1427 This function is expected to be called from a test as part
1428 of a sequence like the following:
1429
1430 ~~~~~~~~
1431 boot_id = host.get_boot_id()
1432 # trigger shutdown on the host
1433 host.test_wait_for_shutdown()
1434 # trigger boot on the host
1435 host.test_wait_for_boot(boot_id)
1436 ~~~~~~~~
1437
1438 @exception TestFail The host did not shut down within the
1439 allowed time.
1440 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001441 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001442 raise error.TestFail(
1443 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001444 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001445
1446
1447 def test_wait_for_boot(self, old_boot_id=None):
1448 """Wait for the client to boot from cold power.
1449
1450 The `old_boot_id` parameter should be the value from
1451 `get_boot_id()` obtained prior to shutting down. A
1452 `TestFail` exception is raised if the boot id does not
1453 change. The boot id test is omitted if `old_boot_id` is not
1454 specified.
1455
1456 See @ref test_wait_for_shutdown for more on this function's
1457 usage.
1458
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001459 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001460 shut down.
1461
1462 @exception TestFail The host did not respond within the
1463 allowed time.
1464 @exception TestFail The host responded, but the boot id test
1465 indicated that there was no reboot.
1466 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001467 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001468 raise error.TestFail(
1469 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001470 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001471 elif old_boot_id:
1472 if self.get_boot_id() == old_boot_id:
1473 raise error.TestFail(
1474 'client is back up, but did not reboot'
1475 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001476
1477
1478 @staticmethod
1479 def check_for_rpm_support(hostname):
1480 """For a given hostname, return whether or not it is powered by an RPM.
1481
Simran Basi1df55112013-09-06 11:25:09 -07001482 @param hostname: hostname to check for rpm support.
1483
Simran Basid5e5e272012-09-24 15:23:59 -07001484 @return None if this host does not follows the defined naming format
1485 for RPM powered DUT's in the lab. If it does follow the format,
1486 it returns a regular expression MatchObject instead.
1487 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001488 return re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001489
1490
1491 def has_power(self):
1492 """For this host, return whether or not it is powered by an RPM.
1493
1494 @return True if this host is in the CROS lab and follows the defined
1495 naming format.
1496 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001497 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001498
1499
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001500 def _set_power(self, state, power_method):
1501 """Sets the power to the host via RPM, Servo or manual.
1502
1503 @param state Specifies which power state to set to DUT
1504 @param power_method Specifies which method of power control to
1505 use. By default "RPM" will be used. Valid values
1506 are the strings "RPM", "manual", "servoj10".
1507
1508 """
1509 ACCEPTABLE_STATES = ['ON', 'OFF']
1510
1511 if state.upper() not in ACCEPTABLE_STATES:
1512 raise error.TestError('State must be one of: %s.'
1513 % (ACCEPTABLE_STATES,))
1514
1515 if power_method == self.POWER_CONTROL_SERVO:
1516 logging.info('Setting servo port J10 to %s', state)
1517 self.servo.set('prtctl3_pwren', state.lower())
1518 time.sleep(self._USB_POWER_TIMEOUT)
1519 elif power_method == self.POWER_CONTROL_MANUAL:
1520 logging.info('You have %d seconds to set the AC power to %s.',
1521 self._POWER_CYCLE_TIMEOUT, state)
1522 time.sleep(self._POWER_CYCLE_TIMEOUT)
1523 else:
1524 if not self.has_power():
1525 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001526 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1527 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1528 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07001529 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07001530
1531
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001532 def power_off(self, power_method=POWER_CONTROL_RPM):
1533 """Turn off power to this host via RPM, Servo or manual.
1534
1535 @param power_method Specifies which method of power control to
1536 use. By default "RPM" will be used. Valid values
1537 are the strings "RPM", "manual", "servoj10".
1538
1539 """
1540 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001541
1542
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001543 def power_on(self, power_method=POWER_CONTROL_RPM):
1544 """Turn on power to this host via RPM, Servo or manual.
1545
1546 @param power_method Specifies which method of power control to
1547 use. By default "RPM" will be used. Valid values
1548 are the strings "RPM", "manual", "servoj10".
1549
1550 """
1551 self._set_power('ON', power_method)
1552
1553
1554 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1555 """Cycle power to this host by turning it OFF, then ON.
1556
1557 @param power_method Specifies which method of power control to
1558 use. By default "RPM" will be used. Valid values
1559 are the strings "RPM", "manual", "servoj10".
1560
1561 """
1562 if power_method in (self.POWER_CONTROL_SERVO,
1563 self.POWER_CONTROL_MANUAL):
1564 self.power_off(power_method=power_method)
1565 time.sleep(self._POWER_CYCLE_TIMEOUT)
1566 self.power_on(power_method=power_method)
1567 else:
1568 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001569
1570
1571 def get_platform(self):
1572 """Determine the correct platform label for this host.
1573
1574 @returns a string representing this host's platform.
1575 """
1576 crossystem = utils.Crossystem(self)
1577 crossystem.init()
1578 # Extract fwid value and use the leading part as the platform id.
1579 # fwid generally follow the format of {platform}.{firmware version}
1580 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1581 platform = crossystem.fwid().split('.')[0].lower()
1582 # Newer platforms start with 'Google_' while the older ones do not.
1583 return platform.replace('google_', '')
1584
1585
Aviv Keshet74c89a92013-02-04 15:18:30 -08001586 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001587 def get_board(self):
1588 """Determine the correct board label for this host.
1589
1590 @returns a string representing this host's board.
1591 """
1592 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1593 run_method=self.run)
1594 board = release_info['CHROMEOS_RELEASE_BOARD']
1595 # Devices in the lab generally have the correct board name but our own
1596 # development devices have {board_name}-signed-{key_type}. The board
1597 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001598 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001599 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001600 return board_format_string % board.split('-')[0]
1601 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001602
1603
Aviv Keshet74c89a92013-02-04 15:18:30 -08001604 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001605 def has_lightsensor(self):
1606 """Determine the correct board label for this host.
1607
1608 @returns the string 'lightsensor' if this host has a lightsensor or
1609 None if it does not.
1610 """
1611 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001612 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001613 try:
1614 # Run the search cmd following the symlinks. Stderr_tee is set to
1615 # None as there can be a symlink loop, but the command will still
1616 # execute correctly with a few messages printed to stderr.
1617 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1618 return 'lightsensor'
1619 except error.AutoservRunError:
1620 # egrep exited with a return code of 1 meaning none of the possible
1621 # lightsensor files existed.
1622 return None
1623
1624
Aviv Keshet74c89a92013-02-04 15:18:30 -08001625 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001626 def has_bluetooth(self):
1627 """Determine the correct board label for this host.
1628
1629 @returns the string 'bluetooth' if this host has bluetooth or
1630 None if it does not.
1631 """
1632 try:
1633 self.run('test -d /sys/class/bluetooth/hci0')
1634 # test exited with a return code of 0.
1635 return 'bluetooth'
1636 except error.AutoservRunError:
1637 # test exited with a return code 1 meaning the directory did not
1638 # exist.
1639 return None
1640
1641
Ilja Friedel0ce0b602013-08-15 18:45:27 -07001642 @label_decorator('graphics')
1643 def get_graphics(self):
1644 """
1645 Determine the correct board label for this host.
1646
1647 @returns a string representing this host's graphics. For now ARM boards
1648 return graphics:gles while all other boards return graphics:gl. This
1649 may change over time, but for robustness reasons this should avoid
1650 executing code in actual graphics libraries (which may not be ready and
1651 is tested by graphics_GLAPICheck).
1652 """
1653 uname = self.run('uname -a').stdout.lower()
1654 if 'arm' in uname:
1655 return 'graphics:gles'
1656 return 'graphics:gl'
1657
1658
Simran Basic6f1f7a2012-10-16 10:47:46 -07001659 def get_labels(self):
1660 """Return a list of labels for this given host.
1661
1662 This is the main way to retrieve all the automatic labels for a host
1663 as it will run through all the currently implemented label functions.
1664 """
1665 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001666 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001667 label = label_function(self)
1668 if label:
1669 labels.append(label)
1670 return labels