blob: 3c09b1f86cce31861f6f883d95766af9a52ceee5 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07006import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import logging
Dan Shi0f466e82013-02-22 15:44:58 -08008import os
Simran Basid5e5e272012-09-24 15:23:59 -07009import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080010import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070011import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070012import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070013import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070014
J. Richard Barnette45e93de2012-04-11 17:24:15 -070015from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080016from autotest_lib.client.common_lib import error
17from autotest_lib.client.common_lib import global_config
beeps687243d2013-07-18 15:29:27 -070018from autotest_lib.client.common_lib import site_utils
J. Richard Barnette45e93de2012-04-11 17:24:15 -070019from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080020from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080021from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080022from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070023from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070024from autotest_lib.server import autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070025from autotest_lib.server import utils as server_utils
Scott Zawalski89c44dd2013-02-26 09:28:02 -050026from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070027from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
J. Richard Barnette75487572013-03-08 12:47:50 -080028from autotest_lib.server.cros.servo import servo
Fang Deng96667ca2013-08-01 17:46:18 -070029from autotest_lib.server.hosts import abstract_ssh
beeps687243d2013-07-18 15:29:27 -070030from autotest_lib.site_utils.graphite import stats
Simran Basidcff4252012-11-20 16:13:20 -080031from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070032
33
beeps32a63082013-08-22 14:02:29 -070034try:
35 import jsonrpclib
36except ImportError:
37 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070038
Fang Dengd1c2b732013-08-20 12:59:46 -070039
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080040def _make_servo_hostname(hostname):
41 host_parts = hostname.split('.')
42 host_parts[0] = host_parts[0] + '-servo'
43 return '.'.join(host_parts)
44
45
46def _get_lab_servo(target_hostname):
47 """Instantiate a Servo for |target_hostname| in the lab.
48
49 Assuming that |target_hostname| is a device in the CrOS test
50 lab, create and return a Servo object pointed at the servo
51 attached to that DUT. The servo in the test lab is assumed
52 to already have servod up and running on it.
53
54 @param target_hostname: device whose servo we want to target.
55 @return an appropriately configured Servo instance.
56 """
57 servo_host = _make_servo_hostname(target_hostname)
58 if utils.host_is_in_lab_zone(servo_host):
59 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080060 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080061 except: # pylint: disable=W0702
62 # TODO(jrbarnette): Long-term, if we can't get to
63 # a servo in the lab, we want to fail, so we should
64 # pass any exceptions along. Short-term, we're not
65 # ready to rely on servo, so we ignore failures.
66 pass
67 return None
68
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080069
Aviv Keshet74c89a92013-02-04 15:18:30 -080070def add_label_detector(label_function_list, label_list=None, label=None):
71 """Decorator used to group functions together into the provided list.
72 @param label_function_list: List of label detecting functions to add
73 decorated function to.
74 @param label_list: List of detectable labels to add detectable labels to.
75 (Default: None)
76 @param label: Label string that is detectable by this detection function
77 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080078 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070079 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080080 """
81 @param func: The function to be added as a detector.
82 """
83 label_function_list.append(func)
84 if label and label_list is not None:
85 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070086 return func
87 return add_func
88
89
Fang Deng96667ca2013-08-01 17:46:18 -070090class SiteHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070091 """Chromium OS specific subclass of Host."""
92
93 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050094 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070095
Richard Barnette0c73ffc2012-11-19 15:21:18 -080096 # Time to wait for new kernel to be marked successful after
97 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -070098 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -070099
Richard Barnette03a0c132012-11-05 12:40:35 -0800100 # Timeout values (in seconds) associated with various Chrome OS
101 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700102 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800103 # In general, a good rule of thumb is that the timeout can be up
104 # to twice the typical measured value on the slowest platform.
105 # The times here have not necessarily been empirically tested to
106 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700107 #
108 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800109 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
110 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700111 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800112 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800113 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700114 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800115 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800116 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800117 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700118 # REBOOT_TIMEOUT: How long to wait for a reboot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800119 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700120
121 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800122 RESUME_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700123 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700124 USB_BOOT_TIMEOUT = 150
Chris Sosab76e0ee2013-05-22 16:55:41 -0700125
126 # We have a long timeout to ensure we don't flakily fail due to other
127 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
128 REBOOT_TIMEOUT = 300
129
Richard Barnette03a0c132012-11-05 12:40:35 -0800130 _INSTALL_TIMEOUT = 240
131
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800132 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
133 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
134 _USB_POWER_TIMEOUT = 5
135 _POWER_CYCLE_TIMEOUT = 10
136
beeps32a63082013-08-22 14:02:29 -0700137 _RPC_PROXY_URL = 'http://localhost:%d'
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800138
Richard Barnette82c35912012-11-20 10:09:10 -0800139 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
140 'rpm_recovery_boards', type=str).split(',')
141
142 _MAX_POWER_CYCLE_ATTEMPTS = 6
143 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
144 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
145 'host[0-9]+')
146 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
147 'in_illuminance0_raw',
148 'illuminance0_input']
149 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
150 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800151 _DETECTABLE_LABELS = []
152 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
153 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700154
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800155 # Constants used in ping_wait_up() and ping_wait_down().
156 #
157 # _PING_WAIT_COUNT is the approximate number of polling
158 # cycles to use when waiting for a host state change.
159 #
160 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
161 # for arguments to the internal _ping_wait_for_status()
162 # method.
163 _PING_WAIT_COUNT = 40
164 _PING_STATUS_DOWN = False
165 _PING_STATUS_UP = True
166
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800167 # Allowed values for the power_method argument.
168
169 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
170 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
171 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
172 POWER_CONTROL_RPM = 'RPM'
173 POWER_CONTROL_SERVO = 'servoj10'
174 POWER_CONTROL_MANUAL = 'manual'
175
176 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
177 POWER_CONTROL_SERVO,
178 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800179
Simran Basi5e6339a2013-03-21 11:34:32 -0700180 _RPM_OUTLET_CHANGED = 'outlet_changed'
181
beeps687243d2013-07-18 15:29:27 -0700182
J. Richard Barnette964fba02012-10-24 17:34:29 -0700183 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800184 def get_servo_arguments(args_dict):
185 """Extract servo options from `args_dict` and return the result.
186
187 Take the provided dictionary of argument options and return
188 a subset that represent standard arguments needed to
189 construct a servo object for a host. The intent is to
190 provide standard argument processing from run_remote_tests
191 for tests that require a servo to operate.
192
193 Recommended usage:
194 ~~~~~~~~
195 args_dict = utils.args_to_dict(args)
196 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
197 host = hosts.create_host(machine, servo_args=servo_args)
198 ~~~~~~~~
199
200 @param args_dict Dictionary from which to extract the servo
201 arguments.
202 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700203 servo_args = {}
204 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800205 if arg in args_dict:
206 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700207 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700208
J. Richard Barnette964fba02012-10-24 17:34:29 -0700209
Fang Dengd1c2b732013-08-20 12:59:46 -0700210 def _initialize(self, hostname, servo_args=None, ssh_verbosity_flag='',
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700211 ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700212 *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700213 """Initialize superclasses, and |self.servo|.
214
215 For creating the host servo object, there are three
216 possibilities: First, if the host is a lab system known to
217 have a servo board, we connect to that servo unconditionally.
218 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700219 servo features for testing, it will pass settings for
220 `servo_host`, `servo_port`, or both. If neither of these
221 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700222
223 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700224 super(SiteHost, self)._initialize(hostname=hostname,
225 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700226 # self.env is a dictionary of environment variable settings
227 # to be exported for commands run on the host.
228 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
229 # errors that might happen.
230 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700231 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700232 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700233 self._ssh_options = ssh_options
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800234 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700235 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700236 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700237
238
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500239 def get_repair_image_name(self):
240 """Generate a image_name from variables in the global config.
241
242 @returns a str of $board-version/$BUILD.
243
244 """
245 stable_version = global_config.global_config.get_config_value(
246 'CROS', 'stable_cros_version')
247 build_pattern = global_config.global_config.get_config_value(
248 'CROS', 'stable_build_pattern')
249 board = self._get_board_from_afe()
250 if board is None:
251 raise error.AutoservError('DUT has no board attribute, '
252 'cannot be repaired.')
253 return build_pattern % (board, stable_version)
254
255
Scott Zawalski62bacae2013-03-05 10:40:32 -0500256 def _host_in_AFE(self):
257 """Check if the host is an object the AFE knows.
258
259 @returns the host object.
260 """
261 return self._AFE.get_hosts(hostname=self.hostname)
262
263
Chris Sosab76e0ee2013-05-22 16:55:41 -0700264 def lookup_job_repo_url(self):
265 """Looks up the job_repo_url for the host.
266
267 @returns job_repo_url from AFE or None if not found.
268
269 @raises KeyError if the host does not have a job_repo_url
270 """
271 if not self._host_in_AFE():
272 return None
273
274 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700275 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
276 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700277
278
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500279 def clear_cros_version_labels_and_job_repo_url(self):
280 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500281 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400282 return
283
Scott Zawalski62bacae2013-03-05 10:40:32 -0500284 host_list = [self.hostname]
285 labels = self._AFE.get_labels(
286 name__startswith=ds_constants.VERSION_PREFIX,
287 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800288
Scott Zawalski62bacae2013-03-05 10:40:32 -0500289 for label in labels:
290 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500291
beepscb6f1e22013-06-28 19:14:10 -0700292 self.update_job_repo_url(None, None)
293
294
295 def update_job_repo_url(self, devserver_url, image_name):
296 """
297 Updates the job_repo_url host attribute and asserts it's value.
298
299 @param devserver_url: The devserver to use in the job_repo_url.
300 @param image_name: The name of the image to use in the job_repo_url.
301
302 @raises AutoservError: If we failed to update the job_repo_url.
303 """
304 repo_url = None
305 if devserver_url and image_name:
306 repo_url = tools.get_package_url(devserver_url, image_name)
307 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500308 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700309 if self.lookup_job_repo_url() != repo_url:
310 raise error.AutoservError('Failed to update job_repo_url with %s, '
311 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500312
313
Dan Shie9309262013-06-19 22:50:21 -0700314 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400315 """Add cros_version labels and host attribute job_repo_url.
316
317 @param image_name: The name of the image e.g.
318 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700319
Scott Zawalskieadbf702013-03-14 09:23:06 -0400320 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500321 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400322 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500323
Scott Zawalskieadbf702013-03-14 09:23:06 -0400324 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700325 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500326
327 labels = self._AFE.get_labels(name=cros_label)
328 if labels:
329 label = labels[0]
330 else:
331 label = self._AFE.create_label(name=cros_label)
332
333 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700334 self.update_job_repo_url(devserver_url, image_name)
335
336
beepsdae65fd2013-07-26 16:24:41 -0700337 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700338 """
339 Make sure job_repo_url of this host is valid.
340
joychen03eaad92013-06-26 09:55:21 -0700341 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700342 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
343 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
344 download and extract it. If the devserver embedded in the url is
345 unresponsive, update the job_repo_url of the host after staging it on
346 another devserver.
347
348 @param job_repo_url: A url pointing to the devserver where the autotest
349 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700350 @param tag: The tag from the server job, in the format
351 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700352
353 @raises DevServerException: If we could not resolve a devserver.
354 @raises AutoservError: If we're unable to save the new job_repo_url as
355 a result of choosing a new devserver because the old one failed to
356 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700357 @raises urllib2.URLError: If the devserver embedded in job_repo_url
358 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700359 """
360 job_repo_url = self.lookup_job_repo_url()
361 if not job_repo_url:
362 logging.warning('No job repo url set on host %s', self.hostname)
363 return
364
365 logging.info('Verifying job repo url %s', job_repo_url)
366 devserver_url, image_name = tools.get_devserver_build_from_package_url(
367 job_repo_url)
368
beeps0c865032013-07-30 11:37:06 -0700369 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700370
371 logging.info('Staging autotest artifacts for %s on devserver %s',
372 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700373
374 start_time = time.time()
beepscb6f1e22013-06-28 19:14:10 -0700375 ds.stage_artifacts(image_name, ['autotest'])
beeps687243d2013-07-18 15:29:27 -0700376 stage_time = time.time() - start_time
377
378 # Record how much of the verification time comes from a devserver
379 # restage. If we're doing things right we should not see multiple
380 # devservers for a given board/build/branch path.
381 try:
382 board, build_type, branch = site_utils.ParseBuildName(
383 image_name)[:3]
384 except site_utils.ParseBuildNameException as e:
385 pass
386 else:
beeps0c865032013-07-30 11:37:06 -0700387 devserver = devserver_url[
388 devserver_url.find('/')+2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700389 stats_key = {
390 'board': board,
391 'build_type': build_type,
392 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700393 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700394 }
395 stats.Gauge('verify_job_repo_url').send(
396 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
397 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700398
Scott Zawalskieadbf702013-03-14 09:23:06 -0400399
Dan Shi0f466e82013-02-22 15:44:58 -0800400 def _try_stateful_update(self, update_url, force_update, updater):
401 """Try to use stateful update to initialize DUT.
402
403 When DUT is already running the same version that machine_install
404 tries to install, stateful update is a much faster way to clean up
405 the DUT for testing, compared to a full reimage. It is implemeted
406 by calling autoupdater.run_update, but skipping updating root, as
407 updating the kernel is time consuming and not necessary.
408
409 @param update_url: url of the image.
410 @param force_update: Set to True to update the image even if the DUT
411 is running the same version.
412 @param updater: ChromiumOSUpdater instance used to update the DUT.
413 @returns: True if the DUT was updated with stateful update.
414
415 """
416 if not updater.check_version():
417 return False
418 if not force_update:
419 logging.info('Canceling stateful update because the new and '
420 'old versions are the same.')
421 return False
422 # Following folders should be rebuilt after stateful update.
423 # A test file is used to confirm each folder gets rebuilt after
424 # the stateful update.
425 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
426 test_file = '.test_file_to_be_deleted'
427 for folder in folders_to_check:
428 touch_path = os.path.join(folder, test_file)
429 self.run('touch %s' % touch_path)
430
431 if not updater.run_update(force_update=True, update_root=False):
432 return False
433
434 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700435 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800436 check_file_cmd = 'test -f %s; echo $?'
437 for folder in folders_to_check:
438 test_file_path = os.path.join(folder, test_file)
439 result = self.run(check_file_cmd % test_file_path,
440 ignore_status=True)
441 if result.exit_status == 1:
442 return False
443 return True
444
445
J. Richard Barnette7275b612013-06-04 18:13:11 -0700446 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800447 """After the DUT is updated, confirm machine_install succeeded.
448
449 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700450 @param expected_kernel: kernel expected to be active after reboot,
451 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800452
453 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700454 # Touch the lab machine file to leave a marker that
455 # distinguishes this image from other test images.
456 # Afterwards, we must re-run the autoreboot script because
457 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800458 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800459 self.run('start autoreboot')
460
J. Richard Barnette7275b612013-06-04 18:13:11 -0700461 # Figure out the newly active kernel.
462 active_kernel, _ = updater.get_kernel_state()
463
464 # Check for rollback due to a bad build.
465 if expected_kernel and active_kernel != expected_kernel:
466 # Print out some information to make it easier to debug
467 # the rollback.
Dan Shi0f466e82013-02-22 15:44:58 -0800468 logging.debug('Dumping partition table.')
Dan Shi346725f2013-03-20 15:22:38 -0700469 self.run('cgpt show $(rootdev -s -d)')
Dan Shi0f466e82013-02-22 15:44:58 -0800470 logging.debug('Dumping crossystem for firmware debugging.')
Dan Shi346725f2013-03-20 15:22:38 -0700471 self.run('crossystem --all')
Dan Shi0f466e82013-02-22 15:44:58 -0800472 raise autoupdater.ChromiumOSError(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700473 'Build %s failed to boot on %s; system rolled back '
474 'to previous build' % (updater.update_version,
475 self.hostname))
Dan Shi0f466e82013-02-22 15:44:58 -0800476
J. Richard Barnette7275b612013-06-04 18:13:11 -0700477 # Check that we've got the build we meant to install.
478 if not updater.check_version_to_confirm_install():
479 raise autoupdater.ChromiumOSError(
480 'Failed to update %s to build %s; found build '
481 '%s instead' % (self.hostname,
482 updater.update_version,
483 updater.get_build_id()))
Scott Zawalski62bacae2013-03-05 10:40:32 -0500484
J. Richard Barnette7275b612013-06-04 18:13:11 -0700485 # Make sure chromeos-setgoodkernel runs.
486 try:
Dan Shi0f466e82013-02-22 15:44:58 -0800487 utils.poll_for_condition(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700488 lambda: (updater.get_kernel_tries(active_kernel) == 0
489 and updater.get_kernel_success(active_kernel)),
490 exception=autoupdater.ChromiumOSError(),
Dan Shi0f466e82013-02-22 15:44:58 -0800491 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
J. Richard Barnette7275b612013-06-04 18:13:11 -0700492 except autoupdater.ChromiumOSError as e:
493 services_status = self.run('status system-services').stdout
494 if services_status != 'system-services start/running\n':
495 event = ('Chrome failed to reach login screen')
496 else:
497 event = ('update-engine failed to call '
498 'chromeos-setgoodkernel')
499 raise autoupdater.ChromiumOSError(
500 'After update and reboot, %s '
501 'within %d seconds' % (event,
502 self._KERNEL_UPDATE_TIMEOUT))
Dan Shi0f466e82013-02-22 15:44:58 -0800503
504
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700505 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400506 """Stage a build on a devserver and return the update_url.
507
508 @param image_name: a name like lumpy-release/R27-3837.0.0
509 @returns an update URL like:
510 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
511 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700512 if not image_name:
513 image_name = self.get_repair_image_name()
514 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400515 devserver = dev_server.ImageServer.resolve(image_name)
516 devserver.trigger_download(image_name, synchronous=False)
517 return tools.image_url_pattern() % (devserver.url(), image_name)
518
519
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700520 def stage_image_for_servo(self, image_name=None):
521 """Stage a build on a devserver and return the update_url.
522
523 @param image_name: a name like lumpy-release/R27-3837.0.0
524 @returns an update URL like:
525 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
526 """
527 if not image_name:
528 image_name = self.get_repair_image_name()
529 logging.info('Staging build for servo install: %s', image_name)
530 devserver = dev_server.ImageServer.resolve(image_name)
531 devserver.stage_artifacts(image_name, ['test_image'])
532 return devserver.get_test_image_url(image_name)
533
534
beepse539be02013-07-31 21:57:39 -0700535 def stage_factory_image_for_servo(self, image_name):
536 """Stage a build on a devserver and return the update_url.
537
538 @param image_name: a name like <baord>/4262.204.0
539 @return: An update URL, eg:
540 http://<devserver>/static/canary-channel/\
541 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
542 """
543 if not image_name:
544 logging.error('Need an image_name to stage a factory image.')
545 return
546
547 logging.info('Staging build for servo install: %s', image_name)
548 devserver = dev_server.ImageServer.resolve(image_name)
549 devserver.stage_artifacts(
550 image_name,
551 ['factory_image'],
552 archive_url=dev_server._get_canary_channel_server())
553
554 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
555
556
Chris Sosaa3ac2152012-05-23 22:23:13 -0700557 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500558 local_devserver=False, repair=False):
559 """Install the DUT.
560
Dan Shi0f466e82013-02-22 15:44:58 -0800561 Use stateful update if the DUT is already running the same build.
562 Stateful update does not update kernel and tends to run much faster
563 than a full reimage. If the DUT is running a different build, or it
564 failed to do a stateful update, full update, including kernel update,
565 will be applied to the DUT.
566
Scott Zawalskieadbf702013-03-14 09:23:06 -0400567 Once a host enters machine_install its cros_version label will be
568 removed as well as its host attribute job_repo_url (used for
569 package install).
570
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500571 @param update_url: The url to use for the update
572 pattern: http://$devserver:###/update/$build
573 If update_url is None and repair is True we will install the
574 stable image listed in global_config under
575 CROS.stable_cros_version.
576 @param force_update: Force an update even if the version installed
577 is the same. Default:False
578 @param local_devserver: Used by run_remote_test to allow people to
579 use their local devserver. Default: False
580 @param repair: Whether or not we are in repair mode. This adds special
581 cases for repairing a machine like starting update_engine.
582 Setting repair to True sets force_update to True as well.
583 default: False
584 @raises autoupdater.ChromiumOSError
585
586 """
Dan Shi7458bf62013-06-10 12:50:16 -0700587 if update_url:
588 logging.debug('update url is set to %s', update_url)
589 else:
590 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700591 if self._parser.options.image:
592 requested_build = self._parser.options.image
593 if requested_build.startswith('http://'):
594 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700595 logging.debug('update url is retrieved from requested_build'
596 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700597 else:
598 # Try to stage any build that does not start with
599 # http:// on the devservers defined in
600 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700601 update_url = self._stage_image_for_update(requested_build)
602 logging.debug('Build staged, and update_url is set to: %s',
603 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700604 elif repair:
605 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700606 logging.debug('Build staged, and update_url is set to: %s',
607 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400608 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700609 raise autoupdater.ChromiumOSError(
610 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500611
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500612 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800613 # In case the system is in a bad state, we always reboot the machine
614 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700615 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500616 self.run('stop update-engine; start update-engine')
617 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800618
Chris Sosaa3ac2152012-05-23 22:23:13 -0700619 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700620 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800621 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400622 # Remove cros-version and job_repo_url host attribute from host.
623 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800624 # If the DUT is already running the same build, try stateful update
625 # first. Stateful update does not update kernel and tends to run much
626 # faster than a full reimage.
627 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700628 updated = self._try_stateful_update(
629 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800630 if updated:
631 logging.info('DUT is updated with stateful update.')
632 except Exception as e:
633 logging.exception(e)
634 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700635
Dan Shi0f466e82013-02-22 15:44:58 -0800636 inactive_kernel = None
637 # Do a full update if stateful update is not applicable or failed.
638 if not updated:
639 # In case the system is in a bad state, we always reboot the
640 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700641 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700642
643 # TODO(sosa): Remove temporary hack to get rid of bricked machines
644 # that can't update due to a corrupted policy.
645 self.run('rm -rf /var/lib/whitelist')
646 self.run('touch /var/lib/whitelist')
647 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400648 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700649
Dan Shi0f466e82013-02-22 15:44:58 -0800650 if updater.run_update(force_update):
651 updated = True
652 # Figure out active and inactive kernel.
653 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700654
Dan Shi0f466e82013-02-22 15:44:58 -0800655 # Ensure inactive kernel has higher priority than active.
656 if (updater.get_kernel_priority(inactive_kernel)
657 < updater.get_kernel_priority(active_kernel)):
658 raise autoupdater.ChromiumOSError(
659 'Update failed. The priority of the inactive kernel'
660 ' partition is less than that of the active kernel'
661 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700662
Dan Shi0f466e82013-02-22 15:44:58 -0800663 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700664 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700665
Dan Shi0f466e82013-02-22 15:44:58 -0800666 if updated:
667 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400668 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700669 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800670
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700671 # Clean up any old autotest directories which may be lying around.
672 for path in global_config.global_config.get_config_value(
673 'AUTOSERV', 'client_autodir_paths', type=list):
674 self.run('rm -rf ' + path)
675
676
Dan Shi10e992b2013-08-30 11:02:59 -0700677 def show_update_engine_log(self):
678 """Output update engine log."""
679 logging.debug('Dumping %s', constants.UPDATE_ENGINE_LOG)
680 self.run('cat %s' % constants.UPDATE_ENGINE_LOG)
681
682
Richard Barnette82c35912012-11-20 10:09:10 -0800683 def _get_board_from_afe(self):
684 """Retrieve this host's board from its labels in the AFE.
685
686 Looks for a host label of the form "board:<board>", and
687 returns the "<board>" part of the label. `None` is returned
688 if there is not a single, unique label matching the pattern.
689
690 @returns board from label, or `None`.
691 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700692 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800693
694
695 def get_build(self):
696 """Retrieve the current build for this Host from the AFE.
697
698 Looks through this host's labels in the AFE to determine its build.
699
700 @returns The current build or None if it could not find it or if there
701 were multiple build labels assigned to this host.
702 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700703 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800704
705
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500706 def _install_repair(self):
707 """Attempt to repair this host using upate-engine.
708
709 If the host is up, try installing the DUT with a stable
710 "repair" version of Chrome OS as defined in the global_config
711 under CROS.stable_cros_version.
712
Scott Zawalski62bacae2013-03-05 10:40:32 -0500713 @raises AutoservRepairMethodNA if the DUT is not reachable.
714 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500715
716 """
717 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500718 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500719
720 logging.info('Attempting to reimage machine to repair image.')
721 try:
722 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700723 except autoupdater.ChromiumOSError as e:
724 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500725 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500726 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500727
728
Scott Zawalski62bacae2013-03-05 10:40:32 -0500729 def servo_install(self, image_url=None):
730 """
731 Re-install the OS on the DUT by:
732 1) installing a test image on a USB storage device attached to the Servo
733 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800734 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700735 3) installing the image with chromeos-install.
736
Scott Zawalski62bacae2013-03-05 10:40:32 -0500737 @param image_url: If specified use as the url to install on the DUT.
738 otherwise boot the currently staged image on the USB stick.
Richard Barnette03a0c132012-11-05 12:40:35 -0800739
Scott Zawalski62bacae2013-03-05 10:40:32 -0500740 @raises AutoservError if the image fails to boot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800741 """
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700742 self.servo.install_recovery_image(image_url)
Richard Barnette03a0c132012-11-05 12:40:35 -0800743 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500744 raise error.AutoservRepairFailure(
745 'DUT failed to boot from USB after %d seconds' %
746 self.USB_BOOT_TIMEOUT)
747
748 self.run('chromeos-install --yes', timeout=self._INSTALL_TIMEOUT)
Richard Barnette03a0c132012-11-05 12:40:35 -0800749 self.servo.power_long_press()
Fang Dengafb88142013-05-30 17:44:31 -0700750 self.servo.switch_usbkey('off')
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700751 # We *must* use power_on() here; on Parrot it's how we get
752 # out of recovery mode.
753 self.servo.get_power_state_controller().power_on()
Richard Barnette03a0c132012-11-05 12:40:35 -0800754 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
755 raise error.AutoservError('DUT failed to reboot installed '
756 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500757 self.BOOT_TIMEOUT)
758
759
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700760 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500761 """Reinstall the DUT utilizing servo and a test image.
762
763 Re-install the OS on the DUT by:
764 1) installing a test image on a USB storage device attached to the Servo
765 board,
766 2) booting that image in recovery mode, and then
767 3) installing the image with chromeos-install.
768
Scott Zawalski62bacae2013-03-05 10:40:32 -0500769 @raises AutoservRepairMethodNA if the device does not have servo
770 support.
771
772 """
773 if not self.servo:
774 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
775 'DUT has no servo support.')
776
777 logging.info('Attempting to recovery servo enabled device with '
778 'servo_repair_reinstall')
779
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700780 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500781 self.servo_install(image_url)
782
783
784 def _servo_repair_power(self):
785 """Attempt to repair DUT using an attached Servo.
786
787 Attempt to power on the DUT via power_long_press.
788
789 @raises AutoservRepairMethodNA if the device does not have servo
790 support.
791 @raises AutoservRepairFailure if the repair fails for any reason.
792 """
793 if not self.servo:
794 raise error.AutoservRepairMethodNA('Repair Power NA: '
795 'DUT has no servo support.')
796
797 logging.info('Attempting to recover servo enabled device by '
798 'powering it off and on.')
799 self.servo.get_power_state_controller().power_off()
800 self.servo.get_power_state_controller().power_on()
801 if self.wait_up(self.BOOT_TIMEOUT):
802 return
803
804 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800805
806
Richard Barnette82c35912012-11-20 10:09:10 -0800807 def _powercycle_to_repair(self):
808 """Utilize the RPM Infrastructure to bring the host back up.
809
810 If the host is not up/repaired after the first powercycle we utilize
811 auto fallback to the last good install by powercycling and rebooting the
812 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500813
814 @raises AutoservRepairMethodNA if the device does not support remote
815 power.
816 @raises AutoservRepairFailure if the repair fails for any reason.
817
Richard Barnette82c35912012-11-20 10:09:10 -0800818 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500819 if not self.has_power():
820 raise error.AutoservRepairMethodNA('Device does not support power.')
821
Richard Barnette82c35912012-11-20 10:09:10 -0800822 logging.info('Attempting repair via RPM powercycle.')
823 failed_cycles = 0
824 self.power_cycle()
825 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
826 failed_cycles += 1
827 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -0500828 raise error.AutoservRepairFailure(
829 'Powercycled host %s %d times; device did not come back'
830 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -0800831 self.power_cycle()
832 if failed_cycles == 0:
833 logging.info('Powercycling was successful first time.')
834 else:
835 logging.info('Powercycling was successful after %d failures.',
836 failed_cycles)
837
838
839 def repair_full(self):
840 """Repair a host for repair level NO_PROTECTION.
841
842 This overrides the base class function for repair; it does
843 not call back to the parent class, but instead offers a
844 simplified implementation based on the capabilities in the
845 Chrome OS test lab.
846
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700847 If `self.verify()` fails, the following procedures are
848 attempted:
849 1. Try to re-install to a known stable image using
850 auto-update.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500851 2. If there's a servo for the DUT, try to power the DUT off and
852 on.
853 3. If there's a servo for the DUT, try to re-install via
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700854 the servo.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500855 4. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800856 by power-cycling.
857
858 As with the parent method, the last operation performed on
859 the DUT must be to call `self.verify()`; if that call fails,
860 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700861
Scott Zawalski62bacae2013-03-05 10:40:32 -0500862 @raises AutoservRepairTotalFailure if the repair process fails to
863 fix the DUT.
Richard Barnette82c35912012-11-20 10:09:10 -0800864 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500865 # TODO(scottz): This should use something similar to label_decorator,
866 # but needs to be populated in order so DUTs are repaired with the
867 # least amount of effort.
868 repair_funcs = [self._install_repair, self._servo_repair_power,
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700869 self._servo_repair_reinstall,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500870 self._powercycle_to_repair]
871 errors = []
872 for repair_func in repair_funcs:
873 try:
874 repair_func()
875 self.verify()
876 return
877 except Exception as e:
878 logging.warn('Failed to repair device: %s', e)
879 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500880
Scott Zawalski62bacae2013-03-05 10:40:32 -0500881 raise error.AutoservRepairTotalFailure(
882 'All attempts at repairing the device failed:\n%s' %
883 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -0800884
885
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700886 def close(self):
beeps32a63082013-08-22 14:02:29 -0700887 self.rpc_disconnect_all()
Aviv Keshet284b5812013-08-29 17:36:06 -0700888 super(SiteHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700889
890
Simran Basi5e6339a2013-03-21 11:34:32 -0700891 def _cleanup_poweron(self):
892 """Special cleanup method to make sure hosts always get power back."""
893 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
894 hosts = afe.get_hosts(hostname=self.hostname)
895 if not hosts or not (self._RPM_OUTLET_CHANGED in
896 hosts[0].attributes):
897 return
898 logging.debug('This host has recently interacted with the RPM'
899 ' Infrastructure. Ensuring power is on.')
900 try:
901 self.power_on()
902 except rpm_client.RemotePowerException:
903 # If cleanup has completed but there was an issue with the RPM
904 # Infrastructure, log an error message rather than fail cleanup
905 logging.error('Failed to turn Power On for this host after '
906 'cleanup through the RPM Infrastructure.')
907 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
908 hostname=self.hostname)
909
910
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700911 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700912 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800913 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500914 try:
915 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
916 '_clear_login_prompt_state')
917 self.run('restart ui')
918 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
919 '_wait_for_login_prompt')
Alex Millerf4517962013-02-25 15:03:02 -0800920 except (error.AutotestRunError, error.AutoservRunError):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500921 logging.warn('Unable to restart ui, rebooting device.')
922 # Since restarting the UI fails fall back to normal Autotest
923 # cleanup routines, i.e. reboot the machine.
924 super(SiteHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -0700925 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -0700926 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -0700927 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700928
929
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700930 def reboot(self, **dargs):
931 """
932 This function reboots the site host. The more generic
933 RemoteHost.reboot() performs sync and sleeps for 5
934 seconds. This is not necessary for Chrome OS devices as the
935 sync should be finished in a short time during the reboot
936 command.
937 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800938 if 'reboot_cmd' not in dargs:
939 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
940 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700941 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800942 if 'fastsync' not in dargs:
943 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700944 super(SiteHost, self).reboot(**dargs)
945
946
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700947 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800948 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700949
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800950 Tests for the following conditions:
951 1. All conditions tested by the parent version of this
952 function.
953 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -0700954 3. Sufficient space in /mnt/stateful_partition/encrypted.
955 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700956
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700957 """
958 super(SiteHost, self).verify_software()
959 self.check_diskspace(
960 '/mnt/stateful_partition',
961 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -0700962 'SERVER', 'gb_diskspace_required', type=float,
963 default=20.0))
964 self.check_diskspace(
965 '/mnt/stateful_partition/encrypted',
966 global_config.global_config.get_config_value(
967 'SERVER', 'gb_encrypted_diskspace_required', type=float,
968 default=0.1))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800969 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -0500970 # Makes sure python is present, loads and can use built in functions.
971 # We have seen cases where importing cPickle fails with undefined
972 # symbols in cPickle.so.
973 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700974
975
Fang Deng96667ca2013-08-01 17:46:18 -0700976 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
977 connect_timeout=None, alive_interval=None):
978 """Override default make_ssh_command to use options tuned for Chrome OS.
979
980 Tuning changes:
981 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
982 connection failure. Consistency with remote_access.sh.
983
984 - ServerAliveInterval=180; which causes SSH to ping connection every
985 180 seconds. In conjunction with ServerAliveCountMax ensures
986 that if the connection dies, Autotest will bail out quickly.
987 Originally tried 60 secs, but saw frequent job ABORTS where
988 the test completed successfully.
989
990 - ServerAliveCountMax=3; consistency with remote_access.sh.
991
992 - ConnectAttempts=4; reduce flakiness in connection errors;
993 consistency with remote_access.sh.
994
995 - UserKnownHostsFile=/dev/null; we don't care about the keys.
996 Host keys change with every new installation, don't waste
997 memory/space saving them.
998
999 - SSH protocol forced to 2; needed for ServerAliveInterval.
1000
1001 @param user User name to use for the ssh connection.
1002 @param port Port on the target host to use for ssh connection.
1003 @param opts Additional options to the ssh command.
1004 @param hosts_file Ignored.
1005 @param connect_timeout Ignored.
1006 @param alive_interval Ignored.
1007 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001008 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1009 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001010 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1011 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1012 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1013 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001014 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1015 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001016
1017
beeps32a63082013-08-22 14:02:29 -07001018 def _create_ssh_tunnel(self, port, local_port):
1019 """Create an ssh tunnel from local_port to port.
1020
1021 @param port: remote port on the host.
1022 @param local_port: local forwarding port.
1023
1024 @return: the tunnel process.
1025 """
1026 # Chrome OS on the target closes down most external ports
1027 # for security. We could open the port, but doing that
1028 # would conflict with security tests that check that only
1029 # expected ports are open. So, to get to the port on the
1030 # target we use an ssh tunnel.
1031 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1032 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1033 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1034 logging.debug('Full tunnel command: %s', tunnel_cmd)
1035 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1036 logging.debug('Started ssh tunnel, local = %d'
1037 ' remote = %d, pid = %d',
1038 local_port, port, tunnel_proc.pid)
1039 return tunnel_proc
1040
1041
1042 def _setup_rpc(self, port, command_name):
1043 """Sets up a tunnel process and performs rpc connection book keeping.
1044
1045 This method assumes that xmlrpc and jsonrpc never conflict, since
1046 we can only either have an xmlrpc or a jsonrpc server listening on
1047 a remote port. As such, it enforces a single proxy->remote port
1048 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1049 and then tries to start an xmlrpc proxy forwarded to the same port,
1050 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1051
1052 1. None of the methods on the xmlrpc proxy will work because
1053 the server listening on B is jsonrpc.
1054
1055 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1056 server, as the only use case currently is goofy, which is tied to
1057 the factory image. It is much easier to handle a failed xmlrpc
1058 call on the client than it is to terminate goofy in this scenario,
1059 as doing the latter might leave the DUT in a hard to recover state.
1060
1061 With the current implementation newer rpc proxy connections will
1062 terminate the tunnel processes of older rpc connections tunneling
1063 to the same remote port. If methods are invoked on the client
1064 after this has happened they will fail with connection closed errors.
1065
1066 @param port: The remote forwarding port.
1067 @param command_name: The name of the remote process, to terminate
1068 using pkill.
1069
1070 @return A url that we can use to initiate the rpc connection.
1071 """
1072 self.rpc_disconnect(port)
1073 local_port = utils.get_unused_port()
1074 tunnel_proc = self._create_ssh_tunnel(port, local_port)
1075 self._rpc_proxy_map[port] = (command_name, tunnel_proc)
1076 return self._RPC_PROXY_URL % local_port
1077
1078
Christopher Wileyd78249a2013-03-01 13:05:31 -08001079 def xmlrpc_connect(self, command, port, command_name=None,
1080 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001081 """Connect to an XMLRPC server on the host.
1082
1083 The `command` argument should be a simple shell command that
1084 starts an XMLRPC server on the given `port`. The command
1085 must not daemonize, and must terminate cleanly on SIGTERM.
1086 The command is started in the background on the host, and a
1087 local XMLRPC client for the server is created and returned
1088 to the caller.
1089
1090 Note that the process of creating an XMLRPC client makes no
1091 attempt to connect to the remote server; the caller is
1092 responsible for determining whether the server is running
1093 correctly, and is ready to serve requests.
1094
Christopher Wileyd78249a2013-03-01 13:05:31 -08001095 Optionally, the caller can pass ready_test_name, a string
1096 containing the name of a method to call on the proxy. This
1097 method should take no parameters and return successfully only
1098 when the server is ready to process client requests. When
1099 ready_test_name is set, xmlrpc_connect will block until the
1100 proxy is ready, and throw a TestError if the server isn't
1101 ready by timeout_seconds.
1102
beeps32a63082013-08-22 14:02:29 -07001103 If a server is already running on the remote port, this
1104 method will kill it and disconnect the tunnel process
1105 associated with the connection before establishing a new one,
1106 by consulting the rpc_proxy_map in rpc_disconnect.
1107
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001108 @param command Shell command to start the server.
1109 @param port Port number on which the server is expected to
1110 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001111 @param command_name String to use as input to `pkill` to
1112 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001113 @param ready_test_name String containing the name of a
1114 method defined on the XMLRPC server.
1115 @param timeout_seconds Number of seconds to wait
1116 for the server to become 'ready.' Will throw a
1117 TestFail error if server is not ready in time.
1118
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001119 """
beeps32a63082013-08-22 14:02:29 -07001120 rpc_url = self._setup_rpc(port, command_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001121 # Start the server on the host. Redirection in the command
1122 # below is necessary, because 'ssh' won't terminate until
1123 # background child processes close stdin, stdout, and
1124 # stderr.
1125 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
beeps32a63082013-08-22 14:02:29 -07001126 try:
1127 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
1128 except Exception as e:
1129 self.rpc_disconnect(port)
1130 raise
1131
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001132 logging.debug('Started XMLRPC server on host %s, pid = %s',
1133 self.hostname, remote_pid)
1134
Christopher Wileyd78249a2013-03-01 13:05:31 -08001135 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
1136 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001137 # retry.retry logs each attempt; calculate delay_sec to
1138 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001139 @retry.retry((socket.error,
1140 xmlrpclib.ProtocolError,
1141 httplib.BadStatusLine),
Christopher Wileyd78249a2013-03-01 13:05:31 -08001142 timeout_min=timeout_seconds/60.0,
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001143 delay_sec=min(max(timeout_seconds/20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001144 def ready_test():
1145 """ Call proxy.ready_test_name(). """
1146 getattr(proxy, ready_test_name)()
1147 successful = False
1148 try:
1149 logging.info('Waiting %d seconds for XMLRPC server '
1150 'to start.', timeout_seconds)
1151 ready_test()
1152 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001153 finally:
1154 if not successful:
1155 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001156 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001157 logging.info('XMLRPC server started successfully.')
1158 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001159
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001160
beeps32a63082013-08-22 14:02:29 -07001161 def jsonrpc_connect(self, port):
1162 """Creates a jsonrpc proxy connection through an ssh tunnel.
1163
1164 This method exists to facilitate communication with goofy (which is
1165 the default system manager on all factory images) and as such, leaves
1166 most of the rpc server sanity checking to the caller. Unlike
1167 xmlrpc_connect, this method does not facilitate the creation of a remote
1168 jsonrpc server, as the only clients of this code are factory tests,
1169 for which the goofy system manager is built in to the image and starts
1170 when the target boots.
1171
1172 One can theoretically create multiple jsonrpc proxies all forwarded
1173 to the same remote port, provided the remote port has an rpc server
1174 listening. However, in doing so we stand the risk of leaking an
1175 existing tunnel process, so we always disconnect any older tunnels
1176 we might have through rpc_disconnect.
1177
1178 @param port: port on the remote host that is serving this proxy.
1179
1180 @return: The client proxy.
1181 """
1182 if not jsonrpclib:
1183 logging.warning('Jsonrpclib could not be imported. Check that '
1184 'site-packages contains jsonrpclib.')
1185 return None
1186
1187 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1188
1189 logging.info('Established a jsonrpc connection through port %s.', port)
1190 return proxy
1191
1192
1193 def rpc_disconnect(self, port):
1194 """Disconnect from an RPC server on the host.
1195
1196 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001197 the given `port`. Also closes the local ssh tunnel created
1198 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001199 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001200 client object; however disconnection will cause all
1201 subsequent calls to methods on the object to fail.
1202
1203 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001204 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001205
1206 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001207 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001208 """
beeps32a63082013-08-22 14:02:29 -07001209 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001210 return
beeps32a63082013-08-22 14:02:29 -07001211 entry = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001212 remote_name = entry[0]
1213 tunnel_proc = entry[1]
1214 if remote_name:
1215 # We use 'pkill' to find our target process rather than
1216 # a PID, because the host may have rebooted since
1217 # connecting, and we don't want to kill an innocent
1218 # process with the same PID.
1219 #
1220 # 'pkill' helpfully exits with status 1 if no target
1221 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001222 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001223 # status.
1224 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
1225
1226 if tunnel_proc.poll() is None:
1227 tunnel_proc.terminate()
1228 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1229 else:
1230 logging.debug('Tunnel pid %d terminated early, status %d',
1231 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001232 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001233
1234
beeps32a63082013-08-22 14:02:29 -07001235 def rpc_disconnect_all(self):
1236 """Disconnect all known RPC proxy ports."""
1237 for port in self._rpc_proxy_map.keys():
1238 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001239
1240
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001241 def _ping_check_status(self, status):
1242 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001243
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001244 @param status Check the ping status against this value.
1245 @return True iff `status` and the result of ping are the same
1246 (i.e. both True or both False).
1247
1248 """
1249 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1250 return not (status ^ (ping_val == 0))
1251
1252 def _ping_wait_for_status(self, status, timeout):
1253 """Wait for the host to have a given status (UP or DOWN).
1254
1255 Status is checked by polling. Polling will not last longer
1256 than the number of seconds in `timeout`. The polling
1257 interval will be long enough that only approximately
1258 _PING_WAIT_COUNT polling cycles will be executed, subject
1259 to a maximum interval of about one minute.
1260
1261 @param status Waiting will stop immediately if `ping` of the
1262 host returns this status.
1263 @param timeout Poll for at most this many seconds.
1264 @return True iff the host status from `ping` matched the
1265 requested status at the time of return.
1266
1267 """
1268 # _ping_check_status() takes about 1 second, hence the
1269 # "- 1" in the formula below.
1270 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1271 end_time = time.time() + timeout
1272 while time.time() <= end_time:
1273 if self._ping_check_status(status):
1274 return True
1275 if poll_interval > 0:
1276 time.sleep(poll_interval)
1277
1278 # The last thing we did was sleep(poll_interval), so it may
1279 # have been too long since the last `ping`. Check one more
1280 # time, just to be sure.
1281 return self._ping_check_status(status)
1282
1283 def ping_wait_up(self, timeout):
1284 """Wait for the host to respond to `ping`.
1285
1286 N.B. This method is not a reliable substitute for
1287 `wait_up()`, because a host that responds to ping will not
1288 necessarily respond to ssh. This method should only be used
1289 if the target DUT can be considered functional even if it
1290 can't be reached via ssh.
1291
1292 @param timeout Minimum time to allow before declaring the
1293 host to be non-responsive.
1294 @return True iff the host answered to ping before the timeout.
1295
1296 """
1297 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001298
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001299 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001300 """Wait until the host no longer responds to `ping`.
1301
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001302 This function can be used as a slightly faster version of
1303 `wait_down()`, by avoiding potentially long ssh timeouts.
1304
1305 @param timeout Minimum time to allow for the host to become
1306 non-responsive.
1307 @return True iff the host quit answering ping before the
1308 timeout.
1309
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001310 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001311 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001312
1313 def test_wait_for_sleep(self):
1314 """Wait for the client to enter low-power sleep mode.
1315
1316 The test for "is asleep" can't distinguish a system that is
1317 powered off; to confirm that the unit was asleep, it is
1318 necessary to force resume, and then call
1319 `test_wait_for_resume()`.
1320
1321 This function is expected to be called from a test as part
1322 of a sequence like the following:
1323
1324 ~~~~~~~~
1325 boot_id = host.get_boot_id()
1326 # trigger sleep on the host
1327 host.test_wait_for_sleep()
1328 # trigger resume on the host
1329 host.test_wait_for_resume(boot_id)
1330 ~~~~~~~~
1331
1332 @exception TestFail The host did not go to sleep within
1333 the allowed time.
1334 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001335 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001336 raise error.TestFail(
1337 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001338 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001339
1340
1341 def test_wait_for_resume(self, old_boot_id):
1342 """Wait for the client to resume from low-power sleep mode.
1343
1344 The `old_boot_id` parameter should be the value from
1345 `get_boot_id()` obtained prior to entering sleep mode. A
1346 `TestFail` exception is raised if the boot id changes.
1347
1348 See @ref test_wait_for_sleep for more on this function's
1349 usage.
1350
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001351 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001352 target host went to sleep.
1353
1354 @exception TestFail The host did not respond within the
1355 allowed time.
1356 @exception TestFail The host responded, but the boot id test
1357 indicated a reboot rather than a sleep
1358 cycle.
1359 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001360 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001361 raise error.TestFail(
1362 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001363 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001364 else:
1365 new_boot_id = self.get_boot_id()
1366 if new_boot_id != old_boot_id:
1367 raise error.TestFail(
1368 'client rebooted, but sleep was expected'
1369 ' (old boot %s, new boot %s)'
1370 % (old_boot_id, new_boot_id))
1371
1372
1373 def test_wait_for_shutdown(self):
1374 """Wait for the client to shut down.
1375
1376 The test for "has shut down" can't distinguish a system that
1377 is merely asleep; to confirm that the unit was down, it is
1378 necessary to force boot, and then call test_wait_for_boot().
1379
1380 This function is expected to be called from a test as part
1381 of a sequence like the following:
1382
1383 ~~~~~~~~
1384 boot_id = host.get_boot_id()
1385 # trigger shutdown on the host
1386 host.test_wait_for_shutdown()
1387 # trigger boot on the host
1388 host.test_wait_for_boot(boot_id)
1389 ~~~~~~~~
1390
1391 @exception TestFail The host did not shut down within the
1392 allowed time.
1393 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001394 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001395 raise error.TestFail(
1396 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001397 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001398
1399
1400 def test_wait_for_boot(self, old_boot_id=None):
1401 """Wait for the client to boot from cold power.
1402
1403 The `old_boot_id` parameter should be the value from
1404 `get_boot_id()` obtained prior to shutting down. A
1405 `TestFail` exception is raised if the boot id does not
1406 change. The boot id test is omitted if `old_boot_id` is not
1407 specified.
1408
1409 See @ref test_wait_for_shutdown for more on this function's
1410 usage.
1411
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001412 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001413 shut down.
1414
1415 @exception TestFail The host did not respond within the
1416 allowed time.
1417 @exception TestFail The host responded, but the boot id test
1418 indicated that there was no reboot.
1419 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001420 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001421 raise error.TestFail(
1422 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001423 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001424 elif old_boot_id:
1425 if self.get_boot_id() == old_boot_id:
1426 raise error.TestFail(
1427 'client is back up, but did not reboot'
1428 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001429
1430
1431 @staticmethod
1432 def check_for_rpm_support(hostname):
1433 """For a given hostname, return whether or not it is powered by an RPM.
1434
1435 @return None if this host does not follows the defined naming format
1436 for RPM powered DUT's in the lab. If it does follow the format,
1437 it returns a regular expression MatchObject instead.
1438 """
Richard Barnette82c35912012-11-20 10:09:10 -08001439 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001440
1441
1442 def has_power(self):
1443 """For this host, return whether or not it is powered by an RPM.
1444
1445 @return True if this host is in the CROS lab and follows the defined
1446 naming format.
1447 """
1448 return SiteHost.check_for_rpm_support(self.hostname)
1449
1450
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001451 def _set_power(self, state, power_method):
1452 """Sets the power to the host via RPM, Servo or manual.
1453
1454 @param state Specifies which power state to set to DUT
1455 @param power_method Specifies which method of power control to
1456 use. By default "RPM" will be used. Valid values
1457 are the strings "RPM", "manual", "servoj10".
1458
1459 """
1460 ACCEPTABLE_STATES = ['ON', 'OFF']
1461
1462 if state.upper() not in ACCEPTABLE_STATES:
1463 raise error.TestError('State must be one of: %s.'
1464 % (ACCEPTABLE_STATES,))
1465
1466 if power_method == self.POWER_CONTROL_SERVO:
1467 logging.info('Setting servo port J10 to %s', state)
1468 self.servo.set('prtctl3_pwren', state.lower())
1469 time.sleep(self._USB_POWER_TIMEOUT)
1470 elif power_method == self.POWER_CONTROL_MANUAL:
1471 logging.info('You have %d seconds to set the AC power to %s.',
1472 self._POWER_CYCLE_TIMEOUT, state)
1473 time.sleep(self._POWER_CYCLE_TIMEOUT)
1474 else:
1475 if not self.has_power():
1476 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001477 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1478 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1479 hostname=self.hostname)
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001480 rpm_client.set_power(self.hostname, state.upper())
Simran Basid5e5e272012-09-24 15:23:59 -07001481
1482
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001483 def power_off(self, power_method=POWER_CONTROL_RPM):
1484 """Turn off power to this host via RPM, Servo or manual.
1485
1486 @param power_method Specifies which method of power control to
1487 use. By default "RPM" will be used. Valid values
1488 are the strings "RPM", "manual", "servoj10".
1489
1490 """
1491 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001492
1493
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001494 def power_on(self, power_method=POWER_CONTROL_RPM):
1495 """Turn on power to this host via RPM, Servo or manual.
1496
1497 @param power_method Specifies which method of power control to
1498 use. By default "RPM" will be used. Valid values
1499 are the strings "RPM", "manual", "servoj10".
1500
1501 """
1502 self._set_power('ON', power_method)
1503
1504
1505 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1506 """Cycle power to this host by turning it OFF, then ON.
1507
1508 @param power_method Specifies which method of power control to
1509 use. By default "RPM" will be used. Valid values
1510 are the strings "RPM", "manual", "servoj10".
1511
1512 """
1513 if power_method in (self.POWER_CONTROL_SERVO,
1514 self.POWER_CONTROL_MANUAL):
1515 self.power_off(power_method=power_method)
1516 time.sleep(self._POWER_CYCLE_TIMEOUT)
1517 self.power_on(power_method=power_method)
1518 else:
1519 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001520
1521
1522 def get_platform(self):
1523 """Determine the correct platform label for this host.
1524
1525 @returns a string representing this host's platform.
1526 """
1527 crossystem = utils.Crossystem(self)
1528 crossystem.init()
1529 # Extract fwid value and use the leading part as the platform id.
1530 # fwid generally follow the format of {platform}.{firmware version}
1531 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1532 platform = crossystem.fwid().split('.')[0].lower()
1533 # Newer platforms start with 'Google_' while the older ones do not.
1534 return platform.replace('google_', '')
1535
1536
Aviv Keshet74c89a92013-02-04 15:18:30 -08001537 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001538 def get_board(self):
1539 """Determine the correct board label for this host.
1540
1541 @returns a string representing this host's board.
1542 """
1543 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1544 run_method=self.run)
1545 board = release_info['CHROMEOS_RELEASE_BOARD']
1546 # Devices in the lab generally have the correct board name but our own
1547 # development devices have {board_name}-signed-{key_type}. The board
1548 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001549 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001550 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001551 return board_format_string % board.split('-')[0]
1552 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001553
1554
Aviv Keshet74c89a92013-02-04 15:18:30 -08001555 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001556 def has_lightsensor(self):
1557 """Determine the correct board label for this host.
1558
1559 @returns the string 'lightsensor' if this host has a lightsensor or
1560 None if it does not.
1561 """
1562 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001563 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001564 try:
1565 # Run the search cmd following the symlinks. Stderr_tee is set to
1566 # None as there can be a symlink loop, but the command will still
1567 # execute correctly with a few messages printed to stderr.
1568 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1569 return 'lightsensor'
1570 except error.AutoservRunError:
1571 # egrep exited with a return code of 1 meaning none of the possible
1572 # lightsensor files existed.
1573 return None
1574
1575
Aviv Keshet74c89a92013-02-04 15:18:30 -08001576 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001577 def has_bluetooth(self):
1578 """Determine the correct board label for this host.
1579
1580 @returns the string 'bluetooth' if this host has bluetooth or
1581 None if it does not.
1582 """
1583 try:
1584 self.run('test -d /sys/class/bluetooth/hci0')
1585 # test exited with a return code of 0.
1586 return 'bluetooth'
1587 except error.AutoservRunError:
1588 # test exited with a return code 1 meaning the directory did not
1589 # exist.
1590 return None
1591
1592
Ilja Friedel0ce0b602013-08-15 18:45:27 -07001593 @label_decorator('graphics')
1594 def get_graphics(self):
1595 """
1596 Determine the correct board label for this host.
1597
1598 @returns a string representing this host's graphics. For now ARM boards
1599 return graphics:gles while all other boards return graphics:gl. This
1600 may change over time, but for robustness reasons this should avoid
1601 executing code in actual graphics libraries (which may not be ready and
1602 is tested by graphics_GLAPICheck).
1603 """
1604 uname = self.run('uname -a').stdout.lower()
1605 if 'arm' in uname:
1606 return 'graphics:gles'
1607 return 'graphics:gl'
1608
1609
Simran Basic6f1f7a2012-10-16 10:47:46 -07001610 def get_labels(self):
1611 """Return a list of labels for this given host.
1612
1613 This is the main way to retrieve all the automatic labels for a host
1614 as it will run through all the currently implemented label functions.
1615 """
1616 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001617 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001618 label = label_function(self)
1619 if label:
1620 labels.append(label)
1621 return labels