blob: 063d341851e6959a512067a12933ea54b41b2ffe [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07006import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import logging
Dan Shi0f466e82013-02-22 15:44:58 -08008import os
Simran Basid5e5e272012-09-24 15:23:59 -07009import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080010import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070011import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070012import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070013import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070014
J. Richard Barnette45e93de2012-04-11 17:24:15 -070015from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080016from autotest_lib.client.common_lib import error
17from autotest_lib.client.common_lib import global_config
beeps687243d2013-07-18 15:29:27 -070018from autotest_lib.client.common_lib import site_utils
J. Richard Barnette45e93de2012-04-11 17:24:15 -070019from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080020from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080021from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080022from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070023from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070024from autotest_lib.server import autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070025from autotest_lib.server import utils as server_utils
Scott Zawalski89c44dd2013-02-26 09:28:02 -050026from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070027from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
J. Richard Barnette75487572013-03-08 12:47:50 -080028from autotest_lib.server.cros.servo import servo
Fang Deng96667ca2013-08-01 17:46:18 -070029from autotest_lib.server.hosts import abstract_ssh
beeps687243d2013-07-18 15:29:27 -070030from autotest_lib.site_utils.graphite import stats
Simran Basidcff4252012-11-20 16:13:20 -080031from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070032
33
beeps32a63082013-08-22 14:02:29 -070034try:
35 import jsonrpclib
36except ImportError:
37 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070038
Fang Dengd1c2b732013-08-20 12:59:46 -070039
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080040def _make_servo_hostname(hostname):
41 host_parts = hostname.split('.')
42 host_parts[0] = host_parts[0] + '-servo'
43 return '.'.join(host_parts)
44
45
46def _get_lab_servo(target_hostname):
47 """Instantiate a Servo for |target_hostname| in the lab.
48
49 Assuming that |target_hostname| is a device in the CrOS test
50 lab, create and return a Servo object pointed at the servo
51 attached to that DUT. The servo in the test lab is assumed
52 to already have servod up and running on it.
53
54 @param target_hostname: device whose servo we want to target.
55 @return an appropriately configured Servo instance.
56 """
57 servo_host = _make_servo_hostname(target_hostname)
58 if utils.host_is_in_lab_zone(servo_host):
59 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080060 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080061 except: # pylint: disable=W0702
62 # TODO(jrbarnette): Long-term, if we can't get to
63 # a servo in the lab, we want to fail, so we should
64 # pass any exceptions along. Short-term, we're not
65 # ready to rely on servo, so we ignore failures.
66 pass
67 return None
68
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080069
Aviv Keshet74c89a92013-02-04 15:18:30 -080070def add_label_detector(label_function_list, label_list=None, label=None):
71 """Decorator used to group functions together into the provided list.
72 @param label_function_list: List of label detecting functions to add
73 decorated function to.
74 @param label_list: List of detectable labels to add detectable labels to.
75 (Default: None)
76 @param label: Label string that is detectable by this detection function
77 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080078 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070079 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080080 """
81 @param func: The function to be added as a detector.
82 """
83 label_function_list.append(func)
84 if label and label_list is not None:
85 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070086 return func
87 return add_func
88
89
Fang Deng96667ca2013-08-01 17:46:18 -070090class SiteHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070091 """Chromium OS specific subclass of Host."""
92
93 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050094 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070095
Richard Barnette0c73ffc2012-11-19 15:21:18 -080096 # Time to wait for new kernel to be marked successful after
97 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -070098 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -070099
Richard Barnette03a0c132012-11-05 12:40:35 -0800100 # Timeout values (in seconds) associated with various Chrome OS
101 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700102 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800103 # In general, a good rule of thumb is that the timeout can be up
104 # to twice the typical measured value on the slowest platform.
105 # The times here have not necessarily been empirically tested to
106 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700107 #
108 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800109 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
110 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700111 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800112 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800113 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700114 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800115 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800116 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800117 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700118 # REBOOT_TIMEOUT: How long to wait for a reboot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800119 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700120
121 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800122 RESUME_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700123 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700124 USB_BOOT_TIMEOUT = 150
Chris Sosab76e0ee2013-05-22 16:55:41 -0700125
126 # We have a long timeout to ensure we don't flakily fail due to other
127 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
128 REBOOT_TIMEOUT = 300
129
Richard Barnette03a0c132012-11-05 12:40:35 -0800130 _INSTALL_TIMEOUT = 240
131
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800132 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
133 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
134 _USB_POWER_TIMEOUT = 5
135 _POWER_CYCLE_TIMEOUT = 10
136
beeps32a63082013-08-22 14:02:29 -0700137 _RPC_PROXY_URL = 'http://localhost:%d'
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800138
Richard Barnette82c35912012-11-20 10:09:10 -0800139 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
140 'rpm_recovery_boards', type=str).split(',')
141
142 _MAX_POWER_CYCLE_ATTEMPTS = 6
143 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
144 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
145 'host[0-9]+')
146 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
147 'in_illuminance0_raw',
148 'illuminance0_input']
149 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
150 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800151 _DETECTABLE_LABELS = []
152 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
153 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700154
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800155 # Constants used in ping_wait_up() and ping_wait_down().
156 #
157 # _PING_WAIT_COUNT is the approximate number of polling
158 # cycles to use when waiting for a host state change.
159 #
160 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
161 # for arguments to the internal _ping_wait_for_status()
162 # method.
163 _PING_WAIT_COUNT = 40
164 _PING_STATUS_DOWN = False
165 _PING_STATUS_UP = True
166
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800167 # Allowed values for the power_method argument.
168
169 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
170 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
171 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
172 POWER_CONTROL_RPM = 'RPM'
173 POWER_CONTROL_SERVO = 'servoj10'
174 POWER_CONTROL_MANUAL = 'manual'
175
176 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
177 POWER_CONTROL_SERVO,
178 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800179
Simran Basi5e6339a2013-03-21 11:34:32 -0700180 _RPM_OUTLET_CHANGED = 'outlet_changed'
181
beeps687243d2013-07-18 15:29:27 -0700182
J. Richard Barnette964fba02012-10-24 17:34:29 -0700183 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800184 def get_servo_arguments(args_dict):
185 """Extract servo options from `args_dict` and return the result.
186
187 Take the provided dictionary of argument options and return
188 a subset that represent standard arguments needed to
189 construct a servo object for a host. The intent is to
190 provide standard argument processing from run_remote_tests
191 for tests that require a servo to operate.
192
193 Recommended usage:
194 ~~~~~~~~
195 args_dict = utils.args_to_dict(args)
196 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
197 host = hosts.create_host(machine, servo_args=servo_args)
198 ~~~~~~~~
199
200 @param args_dict Dictionary from which to extract the servo
201 arguments.
202 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700203 servo_args = {}
204 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800205 if arg in args_dict:
206 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700207 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700208
J. Richard Barnette964fba02012-10-24 17:34:29 -0700209
Fang Dengd1c2b732013-08-20 12:59:46 -0700210 def _initialize(self, hostname, servo_args=None, ssh_verbosity_flag='',
211 *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700212 """Initialize superclasses, and |self.servo|.
213
214 For creating the host servo object, there are three
215 possibilities: First, if the host is a lab system known to
216 have a servo board, we connect to that servo unconditionally.
217 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700218 servo features for testing, it will pass settings for
219 `servo_host`, `servo_port`, or both. If neither of these
220 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700221
222 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700223 super(SiteHost, self)._initialize(hostname=hostname,
224 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700225 # self.env is a dictionary of environment variable settings
226 # to be exported for commands run on the host.
227 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
228 # errors that might happen.
229 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700230 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700231 self._ssh_verbosity_flag = ssh_verbosity_flag
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800232 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700233 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700234 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700235
236
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500237 def get_repair_image_name(self):
238 """Generate a image_name from variables in the global config.
239
240 @returns a str of $board-version/$BUILD.
241
242 """
243 stable_version = global_config.global_config.get_config_value(
244 'CROS', 'stable_cros_version')
245 build_pattern = global_config.global_config.get_config_value(
246 'CROS', 'stable_build_pattern')
247 board = self._get_board_from_afe()
248 if board is None:
249 raise error.AutoservError('DUT has no board attribute, '
250 'cannot be repaired.')
251 return build_pattern % (board, stable_version)
252
253
Scott Zawalski62bacae2013-03-05 10:40:32 -0500254 def _host_in_AFE(self):
255 """Check if the host is an object the AFE knows.
256
257 @returns the host object.
258 """
259 return self._AFE.get_hosts(hostname=self.hostname)
260
261
Chris Sosab76e0ee2013-05-22 16:55:41 -0700262 def lookup_job_repo_url(self):
263 """Looks up the job_repo_url for the host.
264
265 @returns job_repo_url from AFE or None if not found.
266
267 @raises KeyError if the host does not have a job_repo_url
268 """
269 if not self._host_in_AFE():
270 return None
271
272 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700273 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
274 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700275
276
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500277 def clear_cros_version_labels_and_job_repo_url(self):
278 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500279 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400280 return
281
Scott Zawalski62bacae2013-03-05 10:40:32 -0500282 host_list = [self.hostname]
283 labels = self._AFE.get_labels(
284 name__startswith=ds_constants.VERSION_PREFIX,
285 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800286
Scott Zawalski62bacae2013-03-05 10:40:32 -0500287 for label in labels:
288 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500289
beepscb6f1e22013-06-28 19:14:10 -0700290 self.update_job_repo_url(None, None)
291
292
293 def update_job_repo_url(self, devserver_url, image_name):
294 """
295 Updates the job_repo_url host attribute and asserts it's value.
296
297 @param devserver_url: The devserver to use in the job_repo_url.
298 @param image_name: The name of the image to use in the job_repo_url.
299
300 @raises AutoservError: If we failed to update the job_repo_url.
301 """
302 repo_url = None
303 if devserver_url and image_name:
304 repo_url = tools.get_package_url(devserver_url, image_name)
305 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500306 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700307 if self.lookup_job_repo_url() != repo_url:
308 raise error.AutoservError('Failed to update job_repo_url with %s, '
309 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500310
311
Dan Shie9309262013-06-19 22:50:21 -0700312 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400313 """Add cros_version labels and host attribute job_repo_url.
314
315 @param image_name: The name of the image e.g.
316 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700317
Scott Zawalskieadbf702013-03-14 09:23:06 -0400318 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500319 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400320 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500321
Scott Zawalskieadbf702013-03-14 09:23:06 -0400322 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700323 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500324
325 labels = self._AFE.get_labels(name=cros_label)
326 if labels:
327 label = labels[0]
328 else:
329 label = self._AFE.create_label(name=cros_label)
330
331 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700332 self.update_job_repo_url(devserver_url, image_name)
333
334
beepsdae65fd2013-07-26 16:24:41 -0700335 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700336 """
337 Make sure job_repo_url of this host is valid.
338
joychen03eaad92013-06-26 09:55:21 -0700339 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700340 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
341 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
342 download and extract it. If the devserver embedded in the url is
343 unresponsive, update the job_repo_url of the host after staging it on
344 another devserver.
345
346 @param job_repo_url: A url pointing to the devserver where the autotest
347 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700348 @param tag: The tag from the server job, in the format
349 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700350
351 @raises DevServerException: If we could not resolve a devserver.
352 @raises AutoservError: If we're unable to save the new job_repo_url as
353 a result of choosing a new devserver because the old one failed to
354 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700355 @raises urllib2.URLError: If the devserver embedded in job_repo_url
356 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700357 """
358 job_repo_url = self.lookup_job_repo_url()
359 if not job_repo_url:
360 logging.warning('No job repo url set on host %s', self.hostname)
361 return
362
363 logging.info('Verifying job repo url %s', job_repo_url)
364 devserver_url, image_name = tools.get_devserver_build_from_package_url(
365 job_repo_url)
366
beeps0c865032013-07-30 11:37:06 -0700367 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700368
369 logging.info('Staging autotest artifacts for %s on devserver %s',
370 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700371
372 start_time = time.time()
beepscb6f1e22013-06-28 19:14:10 -0700373 ds.stage_artifacts(image_name, ['autotest'])
beeps687243d2013-07-18 15:29:27 -0700374 stage_time = time.time() - start_time
375
376 # Record how much of the verification time comes from a devserver
377 # restage. If we're doing things right we should not see multiple
378 # devservers for a given board/build/branch path.
379 try:
380 board, build_type, branch = site_utils.ParseBuildName(
381 image_name)[:3]
382 except site_utils.ParseBuildNameException as e:
383 pass
384 else:
beeps0c865032013-07-30 11:37:06 -0700385 devserver = devserver_url[
386 devserver_url.find('/')+2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700387 stats_key = {
388 'board': board,
389 'build_type': build_type,
390 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700391 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700392 }
393 stats.Gauge('verify_job_repo_url').send(
394 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
395 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700396
Scott Zawalskieadbf702013-03-14 09:23:06 -0400397
Dan Shi0f466e82013-02-22 15:44:58 -0800398 def _try_stateful_update(self, update_url, force_update, updater):
399 """Try to use stateful update to initialize DUT.
400
401 When DUT is already running the same version that machine_install
402 tries to install, stateful update is a much faster way to clean up
403 the DUT for testing, compared to a full reimage. It is implemeted
404 by calling autoupdater.run_update, but skipping updating root, as
405 updating the kernel is time consuming and not necessary.
406
407 @param update_url: url of the image.
408 @param force_update: Set to True to update the image even if the DUT
409 is running the same version.
410 @param updater: ChromiumOSUpdater instance used to update the DUT.
411 @returns: True if the DUT was updated with stateful update.
412
413 """
414 if not updater.check_version():
415 return False
416 if not force_update:
417 logging.info('Canceling stateful update because the new and '
418 'old versions are the same.')
419 return False
420 # Following folders should be rebuilt after stateful update.
421 # A test file is used to confirm each folder gets rebuilt after
422 # the stateful update.
423 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
424 test_file = '.test_file_to_be_deleted'
425 for folder in folders_to_check:
426 touch_path = os.path.join(folder, test_file)
427 self.run('touch %s' % touch_path)
428
429 if not updater.run_update(force_update=True, update_root=False):
430 return False
431
432 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700433 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800434 check_file_cmd = 'test -f %s; echo $?'
435 for folder in folders_to_check:
436 test_file_path = os.path.join(folder, test_file)
437 result = self.run(check_file_cmd % test_file_path,
438 ignore_status=True)
439 if result.exit_status == 1:
440 return False
441 return True
442
443
J. Richard Barnette7275b612013-06-04 18:13:11 -0700444 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800445 """After the DUT is updated, confirm machine_install succeeded.
446
447 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700448 @param expected_kernel: kernel expected to be active after reboot,
449 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800450
451 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700452 # Touch the lab machine file to leave a marker that
453 # distinguishes this image from other test images.
454 # Afterwards, we must re-run the autoreboot script because
455 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800456 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800457 self.run('start autoreboot')
458
J. Richard Barnette7275b612013-06-04 18:13:11 -0700459 # Figure out the newly active kernel.
460 active_kernel, _ = updater.get_kernel_state()
461
462 # Check for rollback due to a bad build.
463 if expected_kernel and active_kernel != expected_kernel:
464 # Print out some information to make it easier to debug
465 # the rollback.
Dan Shi0f466e82013-02-22 15:44:58 -0800466 logging.debug('Dumping partition table.')
Dan Shi346725f2013-03-20 15:22:38 -0700467 self.run('cgpt show $(rootdev -s -d)')
Dan Shi0f466e82013-02-22 15:44:58 -0800468 logging.debug('Dumping crossystem for firmware debugging.')
Dan Shi346725f2013-03-20 15:22:38 -0700469 self.run('crossystem --all')
Dan Shi0f466e82013-02-22 15:44:58 -0800470 raise autoupdater.ChromiumOSError(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700471 'Build %s failed to boot on %s; system rolled back '
472 'to previous build' % (updater.update_version,
473 self.hostname))
Dan Shi0f466e82013-02-22 15:44:58 -0800474
J. Richard Barnette7275b612013-06-04 18:13:11 -0700475 # Check that we've got the build we meant to install.
476 if not updater.check_version_to_confirm_install():
477 raise autoupdater.ChromiumOSError(
478 'Failed to update %s to build %s; found build '
479 '%s instead' % (self.hostname,
480 updater.update_version,
481 updater.get_build_id()))
Scott Zawalski62bacae2013-03-05 10:40:32 -0500482
J. Richard Barnette7275b612013-06-04 18:13:11 -0700483 # Make sure chromeos-setgoodkernel runs.
484 try:
Dan Shi0f466e82013-02-22 15:44:58 -0800485 utils.poll_for_condition(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700486 lambda: (updater.get_kernel_tries(active_kernel) == 0
487 and updater.get_kernel_success(active_kernel)),
488 exception=autoupdater.ChromiumOSError(),
Dan Shi0f466e82013-02-22 15:44:58 -0800489 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
J. Richard Barnette7275b612013-06-04 18:13:11 -0700490 except autoupdater.ChromiumOSError as e:
491 services_status = self.run('status system-services').stdout
492 if services_status != 'system-services start/running\n':
493 event = ('Chrome failed to reach login screen')
494 else:
495 event = ('update-engine failed to call '
496 'chromeos-setgoodkernel')
497 raise autoupdater.ChromiumOSError(
498 'After update and reboot, %s '
499 'within %d seconds' % (event,
500 self._KERNEL_UPDATE_TIMEOUT))
Dan Shi0f466e82013-02-22 15:44:58 -0800501
502
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700503 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400504 """Stage a build on a devserver and return the update_url.
505
506 @param image_name: a name like lumpy-release/R27-3837.0.0
507 @returns an update URL like:
508 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
509 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700510 if not image_name:
511 image_name = self.get_repair_image_name()
512 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400513 devserver = dev_server.ImageServer.resolve(image_name)
514 devserver.trigger_download(image_name, synchronous=False)
515 return tools.image_url_pattern() % (devserver.url(), image_name)
516
517
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700518 def stage_image_for_servo(self, image_name=None):
519 """Stage a build on a devserver and return the update_url.
520
521 @param image_name: a name like lumpy-release/R27-3837.0.0
522 @returns an update URL like:
523 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
524 """
525 if not image_name:
526 image_name = self.get_repair_image_name()
527 logging.info('Staging build for servo install: %s', image_name)
528 devserver = dev_server.ImageServer.resolve(image_name)
529 devserver.stage_artifacts(image_name, ['test_image'])
530 return devserver.get_test_image_url(image_name)
531
532
beepse539be02013-07-31 21:57:39 -0700533 def stage_factory_image_for_servo(self, image_name):
534 """Stage a build on a devserver and return the update_url.
535
536 @param image_name: a name like <baord>/4262.204.0
537 @return: An update URL, eg:
538 http://<devserver>/static/canary-channel/\
539 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
540 """
541 if not image_name:
542 logging.error('Need an image_name to stage a factory image.')
543 return
544
545 logging.info('Staging build for servo install: %s', image_name)
546 devserver = dev_server.ImageServer.resolve(image_name)
547 devserver.stage_artifacts(
548 image_name,
549 ['factory_image'],
550 archive_url=dev_server._get_canary_channel_server())
551
552 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
553
554
Chris Sosaa3ac2152012-05-23 22:23:13 -0700555 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500556 local_devserver=False, repair=False):
557 """Install the DUT.
558
Dan Shi0f466e82013-02-22 15:44:58 -0800559 Use stateful update if the DUT is already running the same build.
560 Stateful update does not update kernel and tends to run much faster
561 than a full reimage. If the DUT is running a different build, or it
562 failed to do a stateful update, full update, including kernel update,
563 will be applied to the DUT.
564
Scott Zawalskieadbf702013-03-14 09:23:06 -0400565 Once a host enters machine_install its cros_version label will be
566 removed as well as its host attribute job_repo_url (used for
567 package install).
568
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500569 @param update_url: The url to use for the update
570 pattern: http://$devserver:###/update/$build
571 If update_url is None and repair is True we will install the
572 stable image listed in global_config under
573 CROS.stable_cros_version.
574 @param force_update: Force an update even if the version installed
575 is the same. Default:False
576 @param local_devserver: Used by run_remote_test to allow people to
577 use their local devserver. Default: False
578 @param repair: Whether or not we are in repair mode. This adds special
579 cases for repairing a machine like starting update_engine.
580 Setting repair to True sets force_update to True as well.
581 default: False
582 @raises autoupdater.ChromiumOSError
583
584 """
Dan Shi7458bf62013-06-10 12:50:16 -0700585 if update_url:
586 logging.debug('update url is set to %s', update_url)
587 else:
588 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700589 if self._parser.options.image:
590 requested_build = self._parser.options.image
591 if requested_build.startswith('http://'):
592 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700593 logging.debug('update url is retrieved from requested_build'
594 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700595 else:
596 # Try to stage any build that does not start with
597 # http:// on the devservers defined in
598 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700599 update_url = self._stage_image_for_update(requested_build)
600 logging.debug('Build staged, and update_url is set to: %s',
601 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700602 elif repair:
603 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700604 logging.debug('Build staged, and update_url is set to: %s',
605 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400606 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700607 raise autoupdater.ChromiumOSError(
608 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500609
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500610 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800611 # In case the system is in a bad state, we always reboot the machine
612 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700613 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500614 self.run('stop update-engine; start update-engine')
615 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800616
Chris Sosaa3ac2152012-05-23 22:23:13 -0700617 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700618 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800619 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400620 # Remove cros-version and job_repo_url host attribute from host.
621 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800622 # If the DUT is already running the same build, try stateful update
623 # first. Stateful update does not update kernel and tends to run much
624 # faster than a full reimage.
625 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700626 updated = self._try_stateful_update(
627 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800628 if updated:
629 logging.info('DUT is updated with stateful update.')
630 except Exception as e:
631 logging.exception(e)
632 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700633
Dan Shi0f466e82013-02-22 15:44:58 -0800634 inactive_kernel = None
635 # Do a full update if stateful update is not applicable or failed.
636 if not updated:
637 # In case the system is in a bad state, we always reboot the
638 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700639 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700640
641 # TODO(sosa): Remove temporary hack to get rid of bricked machines
642 # that can't update due to a corrupted policy.
643 self.run('rm -rf /var/lib/whitelist')
644 self.run('touch /var/lib/whitelist')
645 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400646 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700647
Dan Shi0f466e82013-02-22 15:44:58 -0800648 if updater.run_update(force_update):
649 updated = True
650 # Figure out active and inactive kernel.
651 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700652
Dan Shi0f466e82013-02-22 15:44:58 -0800653 # Ensure inactive kernel has higher priority than active.
654 if (updater.get_kernel_priority(inactive_kernel)
655 < updater.get_kernel_priority(active_kernel)):
656 raise autoupdater.ChromiumOSError(
657 'Update failed. The priority of the inactive kernel'
658 ' partition is less than that of the active kernel'
659 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700660
Dan Shi0f466e82013-02-22 15:44:58 -0800661 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700662 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700663
Dan Shi0f466e82013-02-22 15:44:58 -0800664 if updated:
665 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400666 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700667 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800668
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700669 # Clean up any old autotest directories which may be lying around.
670 for path in global_config.global_config.get_config_value(
671 'AUTOSERV', 'client_autodir_paths', type=list):
672 self.run('rm -rf ' + path)
673
674
Dan Shi10e992b2013-08-30 11:02:59 -0700675 def show_update_engine_log(self):
676 """Output update engine log."""
677 logging.debug('Dumping %s', constants.UPDATE_ENGINE_LOG)
678 self.run('cat %s' % constants.UPDATE_ENGINE_LOG)
679
680
Richard Barnette82c35912012-11-20 10:09:10 -0800681 def _get_board_from_afe(self):
682 """Retrieve this host's board from its labels in the AFE.
683
684 Looks for a host label of the form "board:<board>", and
685 returns the "<board>" part of the label. `None` is returned
686 if there is not a single, unique label matching the pattern.
687
688 @returns board from label, or `None`.
689 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700690 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800691
692
693 def get_build(self):
694 """Retrieve the current build for this Host from the AFE.
695
696 Looks through this host's labels in the AFE to determine its build.
697
698 @returns The current build or None if it could not find it or if there
699 were multiple build labels assigned to this host.
700 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700701 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800702
703
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500704 def _install_repair(self):
705 """Attempt to repair this host using upate-engine.
706
707 If the host is up, try installing the DUT with a stable
708 "repair" version of Chrome OS as defined in the global_config
709 under CROS.stable_cros_version.
710
Scott Zawalski62bacae2013-03-05 10:40:32 -0500711 @raises AutoservRepairMethodNA if the DUT is not reachable.
712 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500713
714 """
715 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500716 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500717
718 logging.info('Attempting to reimage machine to repair image.')
719 try:
720 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700721 except autoupdater.ChromiumOSError as e:
722 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500723 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500724 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500725
726
Scott Zawalski62bacae2013-03-05 10:40:32 -0500727 def servo_install(self, image_url=None):
728 """
729 Re-install the OS on the DUT by:
730 1) installing a test image on a USB storage device attached to the Servo
731 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800732 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700733 3) installing the image with chromeos-install.
734
Scott Zawalski62bacae2013-03-05 10:40:32 -0500735 @param image_url: If specified use as the url to install on the DUT.
736 otherwise boot the currently staged image on the USB stick.
Richard Barnette03a0c132012-11-05 12:40:35 -0800737
Scott Zawalski62bacae2013-03-05 10:40:32 -0500738 @raises AutoservError if the image fails to boot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800739 """
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700740 self.servo.install_recovery_image(image_url)
Richard Barnette03a0c132012-11-05 12:40:35 -0800741 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500742 raise error.AutoservRepairFailure(
743 'DUT failed to boot from USB after %d seconds' %
744 self.USB_BOOT_TIMEOUT)
745
746 self.run('chromeos-install --yes', timeout=self._INSTALL_TIMEOUT)
Richard Barnette03a0c132012-11-05 12:40:35 -0800747 self.servo.power_long_press()
Fang Dengafb88142013-05-30 17:44:31 -0700748 self.servo.switch_usbkey('off')
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700749 # We *must* use power_on() here; on Parrot it's how we get
750 # out of recovery mode.
751 self.servo.get_power_state_controller().power_on()
Richard Barnette03a0c132012-11-05 12:40:35 -0800752 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
753 raise error.AutoservError('DUT failed to reboot installed '
754 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500755 self.BOOT_TIMEOUT)
756
757
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700758 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500759 """Reinstall the DUT utilizing servo and a test image.
760
761 Re-install the OS on the DUT by:
762 1) installing a test image on a USB storage device attached to the Servo
763 board,
764 2) booting that image in recovery mode, and then
765 3) installing the image with chromeos-install.
766
Scott Zawalski62bacae2013-03-05 10:40:32 -0500767 @raises AutoservRepairMethodNA if the device does not have servo
768 support.
769
770 """
771 if not self.servo:
772 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
773 'DUT has no servo support.')
774
775 logging.info('Attempting to recovery servo enabled device with '
776 'servo_repair_reinstall')
777
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700778 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500779 self.servo_install(image_url)
780
781
782 def _servo_repair_power(self):
783 """Attempt to repair DUT using an attached Servo.
784
785 Attempt to power on the DUT via power_long_press.
786
787 @raises AutoservRepairMethodNA if the device does not have servo
788 support.
789 @raises AutoservRepairFailure if the repair fails for any reason.
790 """
791 if not self.servo:
792 raise error.AutoservRepairMethodNA('Repair Power NA: '
793 'DUT has no servo support.')
794
795 logging.info('Attempting to recover servo enabled device by '
796 'powering it off and on.')
797 self.servo.get_power_state_controller().power_off()
798 self.servo.get_power_state_controller().power_on()
799 if self.wait_up(self.BOOT_TIMEOUT):
800 return
801
802 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800803
804
Richard Barnette82c35912012-11-20 10:09:10 -0800805 def _powercycle_to_repair(self):
806 """Utilize the RPM Infrastructure to bring the host back up.
807
808 If the host is not up/repaired after the first powercycle we utilize
809 auto fallback to the last good install by powercycling and rebooting the
810 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500811
812 @raises AutoservRepairMethodNA if the device does not support remote
813 power.
814 @raises AutoservRepairFailure if the repair fails for any reason.
815
Richard Barnette82c35912012-11-20 10:09:10 -0800816 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500817 if not self.has_power():
818 raise error.AutoservRepairMethodNA('Device does not support power.')
819
Richard Barnette82c35912012-11-20 10:09:10 -0800820 logging.info('Attempting repair via RPM powercycle.')
821 failed_cycles = 0
822 self.power_cycle()
823 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
824 failed_cycles += 1
825 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -0500826 raise error.AutoservRepairFailure(
827 'Powercycled host %s %d times; device did not come back'
828 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -0800829 self.power_cycle()
830 if failed_cycles == 0:
831 logging.info('Powercycling was successful first time.')
832 else:
833 logging.info('Powercycling was successful after %d failures.',
834 failed_cycles)
835
836
837 def repair_full(self):
838 """Repair a host for repair level NO_PROTECTION.
839
840 This overrides the base class function for repair; it does
841 not call back to the parent class, but instead offers a
842 simplified implementation based on the capabilities in the
843 Chrome OS test lab.
844
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700845 If `self.verify()` fails, the following procedures are
846 attempted:
847 1. Try to re-install to a known stable image using
848 auto-update.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500849 2. If there's a servo for the DUT, try to power the DUT off and
850 on.
851 3. If there's a servo for the DUT, try to re-install via
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700852 the servo.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500853 4. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800854 by power-cycling.
855
856 As with the parent method, the last operation performed on
857 the DUT must be to call `self.verify()`; if that call fails,
858 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700859
Scott Zawalski62bacae2013-03-05 10:40:32 -0500860 @raises AutoservRepairTotalFailure if the repair process fails to
861 fix the DUT.
Richard Barnette82c35912012-11-20 10:09:10 -0800862 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500863 # TODO(scottz): This should use something similar to label_decorator,
864 # but needs to be populated in order so DUTs are repaired with the
865 # least amount of effort.
866 repair_funcs = [self._install_repair, self._servo_repair_power,
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700867 self._servo_repair_reinstall,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500868 self._powercycle_to_repair]
869 errors = []
870 for repair_func in repair_funcs:
871 try:
872 repair_func()
873 self.verify()
874 return
875 except Exception as e:
876 logging.warn('Failed to repair device: %s', e)
877 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500878
Scott Zawalski62bacae2013-03-05 10:40:32 -0500879 raise error.AutoservRepairTotalFailure(
880 'All attempts at repairing the device failed:\n%s' %
881 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -0800882
883
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700884 def close(self):
beeps32a63082013-08-22 14:02:29 -0700885 self.rpc_disconnect_all()
Aviv Keshet284b5812013-08-29 17:36:06 -0700886 super(SiteHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700887
888
Simran Basi5e6339a2013-03-21 11:34:32 -0700889 def _cleanup_poweron(self):
890 """Special cleanup method to make sure hosts always get power back."""
891 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
892 hosts = afe.get_hosts(hostname=self.hostname)
893 if not hosts or not (self._RPM_OUTLET_CHANGED in
894 hosts[0].attributes):
895 return
896 logging.debug('This host has recently interacted with the RPM'
897 ' Infrastructure. Ensuring power is on.')
898 try:
899 self.power_on()
900 except rpm_client.RemotePowerException:
901 # If cleanup has completed but there was an issue with the RPM
902 # Infrastructure, log an error message rather than fail cleanup
903 logging.error('Failed to turn Power On for this host after '
904 'cleanup through the RPM Infrastructure.')
905 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
906 hostname=self.hostname)
907
908
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700909 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700910 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800911 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500912 try:
913 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
914 '_clear_login_prompt_state')
915 self.run('restart ui')
916 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
917 '_wait_for_login_prompt')
Alex Millerf4517962013-02-25 15:03:02 -0800918 except (error.AutotestRunError, error.AutoservRunError):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500919 logging.warn('Unable to restart ui, rebooting device.')
920 # Since restarting the UI fails fall back to normal Autotest
921 # cleanup routines, i.e. reboot the machine.
922 super(SiteHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -0700923 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -0700924 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -0700925 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700926
927
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700928 def reboot(self, **dargs):
929 """
930 This function reboots the site host. The more generic
931 RemoteHost.reboot() performs sync and sleeps for 5
932 seconds. This is not necessary for Chrome OS devices as the
933 sync should be finished in a short time during the reboot
934 command.
935 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800936 if 'reboot_cmd' not in dargs:
937 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
938 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700939 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800940 if 'fastsync' not in dargs:
941 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700942 super(SiteHost, self).reboot(**dargs)
943
944
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700945 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800946 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700947
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800948 Tests for the following conditions:
949 1. All conditions tested by the parent version of this
950 function.
951 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -0700952 3. Sufficient space in /mnt/stateful_partition/encrypted.
953 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700954
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700955 """
956 super(SiteHost, self).verify_software()
957 self.check_diskspace(
958 '/mnt/stateful_partition',
959 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -0700960 'SERVER', 'gb_diskspace_required', type=float,
961 default=20.0))
962 self.check_diskspace(
963 '/mnt/stateful_partition/encrypted',
964 global_config.global_config.get_config_value(
965 'SERVER', 'gb_encrypted_diskspace_required', type=float,
966 default=0.1))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800967 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -0500968 # Makes sure python is present, loads and can use built in functions.
969 # We have seen cases where importing cPickle fails with undefined
970 # symbols in cPickle.so.
971 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700972
973
Fang Deng96667ca2013-08-01 17:46:18 -0700974 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
975 connect_timeout=None, alive_interval=None):
976 """Override default make_ssh_command to use options tuned for Chrome OS.
977
978 Tuning changes:
979 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
980 connection failure. Consistency with remote_access.sh.
981
982 - ServerAliveInterval=180; which causes SSH to ping connection every
983 180 seconds. In conjunction with ServerAliveCountMax ensures
984 that if the connection dies, Autotest will bail out quickly.
985 Originally tried 60 secs, but saw frequent job ABORTS where
986 the test completed successfully.
987
988 - ServerAliveCountMax=3; consistency with remote_access.sh.
989
990 - ConnectAttempts=4; reduce flakiness in connection errors;
991 consistency with remote_access.sh.
992
993 - UserKnownHostsFile=/dev/null; we don't care about the keys.
994 Host keys change with every new installation, don't waste
995 memory/space saving them.
996
997 - SSH protocol forced to 2; needed for ServerAliveInterval.
998
999 @param user User name to use for the ssh connection.
1000 @param port Port on the target host to use for ssh connection.
1001 @param opts Additional options to the ssh command.
1002 @param hosts_file Ignored.
1003 @param connect_timeout Ignored.
1004 @param alive_interval Ignored.
1005 """
1006 base_command = ('/usr/bin/ssh -a -x %s %s -o StrictHostKeyChecking=no'
1007 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1008 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1009 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1010 ' -o Protocol=2 -l %s -p %d')
Fang Dengd1c2b732013-08-20 12:59:46 -07001011 return base_command % (self._ssh_verbosity_flag, opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001012
1013
beeps32a63082013-08-22 14:02:29 -07001014 def _create_ssh_tunnel(self, port, local_port):
1015 """Create an ssh tunnel from local_port to port.
1016
1017 @param port: remote port on the host.
1018 @param local_port: local forwarding port.
1019
1020 @return: the tunnel process.
1021 """
1022 # Chrome OS on the target closes down most external ports
1023 # for security. We could open the port, but doing that
1024 # would conflict with security tests that check that only
1025 # expected ports are open. So, to get to the port on the
1026 # target we use an ssh tunnel.
1027 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1028 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1029 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1030 logging.debug('Full tunnel command: %s', tunnel_cmd)
1031 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1032 logging.debug('Started ssh tunnel, local = %d'
1033 ' remote = %d, pid = %d',
1034 local_port, port, tunnel_proc.pid)
1035 return tunnel_proc
1036
1037
1038 def _setup_rpc(self, port, command_name):
1039 """Sets up a tunnel process and performs rpc connection book keeping.
1040
1041 This method assumes that xmlrpc and jsonrpc never conflict, since
1042 we can only either have an xmlrpc or a jsonrpc server listening on
1043 a remote port. As such, it enforces a single proxy->remote port
1044 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1045 and then tries to start an xmlrpc proxy forwarded to the same port,
1046 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1047
1048 1. None of the methods on the xmlrpc proxy will work because
1049 the server listening on B is jsonrpc.
1050
1051 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1052 server, as the only use case currently is goofy, which is tied to
1053 the factory image. It is much easier to handle a failed xmlrpc
1054 call on the client than it is to terminate goofy in this scenario,
1055 as doing the latter might leave the DUT in a hard to recover state.
1056
1057 With the current implementation newer rpc proxy connections will
1058 terminate the tunnel processes of older rpc connections tunneling
1059 to the same remote port. If methods are invoked on the client
1060 after this has happened they will fail with connection closed errors.
1061
1062 @param port: The remote forwarding port.
1063 @param command_name: The name of the remote process, to terminate
1064 using pkill.
1065
1066 @return A url that we can use to initiate the rpc connection.
1067 """
1068 self.rpc_disconnect(port)
1069 local_port = utils.get_unused_port()
1070 tunnel_proc = self._create_ssh_tunnel(port, local_port)
1071 self._rpc_proxy_map[port] = (command_name, tunnel_proc)
1072 return self._RPC_PROXY_URL % local_port
1073
1074
Christopher Wileyd78249a2013-03-01 13:05:31 -08001075 def xmlrpc_connect(self, command, port, command_name=None,
1076 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001077 """Connect to an XMLRPC server on the host.
1078
1079 The `command` argument should be a simple shell command that
1080 starts an XMLRPC server on the given `port`. The command
1081 must not daemonize, and must terminate cleanly on SIGTERM.
1082 The command is started in the background on the host, and a
1083 local XMLRPC client for the server is created and returned
1084 to the caller.
1085
1086 Note that the process of creating an XMLRPC client makes no
1087 attempt to connect to the remote server; the caller is
1088 responsible for determining whether the server is running
1089 correctly, and is ready to serve requests.
1090
Christopher Wileyd78249a2013-03-01 13:05:31 -08001091 Optionally, the caller can pass ready_test_name, a string
1092 containing the name of a method to call on the proxy. This
1093 method should take no parameters and return successfully only
1094 when the server is ready to process client requests. When
1095 ready_test_name is set, xmlrpc_connect will block until the
1096 proxy is ready, and throw a TestError if the server isn't
1097 ready by timeout_seconds.
1098
beeps32a63082013-08-22 14:02:29 -07001099 If a server is already running on the remote port, this
1100 method will kill it and disconnect the tunnel process
1101 associated with the connection before establishing a new one,
1102 by consulting the rpc_proxy_map in rpc_disconnect.
1103
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001104 @param command Shell command to start the server.
1105 @param port Port number on which the server is expected to
1106 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001107 @param command_name String to use as input to `pkill` to
1108 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001109 @param ready_test_name String containing the name of a
1110 method defined on the XMLRPC server.
1111 @param timeout_seconds Number of seconds to wait
1112 for the server to become 'ready.' Will throw a
1113 TestFail error if server is not ready in time.
1114
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001115 """
beeps32a63082013-08-22 14:02:29 -07001116 rpc_url = self._setup_rpc(port, command_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001117 # Start the server on the host. Redirection in the command
1118 # below is necessary, because 'ssh' won't terminate until
1119 # background child processes close stdin, stdout, and
1120 # stderr.
1121 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
beeps32a63082013-08-22 14:02:29 -07001122 try:
1123 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
1124 except Exception as e:
1125 self.rpc_disconnect(port)
1126 raise
1127
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001128 logging.debug('Started XMLRPC server on host %s, pid = %s',
1129 self.hostname, remote_pid)
1130
Christopher Wileyd78249a2013-03-01 13:05:31 -08001131 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
1132 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001133 # retry.retry logs each attempt; calculate delay_sec to
1134 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001135 @retry.retry((socket.error,
1136 xmlrpclib.ProtocolError,
1137 httplib.BadStatusLine),
Christopher Wileyd78249a2013-03-01 13:05:31 -08001138 timeout_min=timeout_seconds/60.0,
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001139 delay_sec=min(max(timeout_seconds/20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001140 def ready_test():
1141 """ Call proxy.ready_test_name(). """
1142 getattr(proxy, ready_test_name)()
1143 successful = False
1144 try:
1145 logging.info('Waiting %d seconds for XMLRPC server '
1146 'to start.', timeout_seconds)
1147 ready_test()
1148 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001149 finally:
1150 if not successful:
1151 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001152 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001153 logging.info('XMLRPC server started successfully.')
1154 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001155
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001156
beeps32a63082013-08-22 14:02:29 -07001157 def jsonrpc_connect(self, port):
1158 """Creates a jsonrpc proxy connection through an ssh tunnel.
1159
1160 This method exists to facilitate communication with goofy (which is
1161 the default system manager on all factory images) and as such, leaves
1162 most of the rpc server sanity checking to the caller. Unlike
1163 xmlrpc_connect, this method does not facilitate the creation of a remote
1164 jsonrpc server, as the only clients of this code are factory tests,
1165 for which the goofy system manager is built in to the image and starts
1166 when the target boots.
1167
1168 One can theoretically create multiple jsonrpc proxies all forwarded
1169 to the same remote port, provided the remote port has an rpc server
1170 listening. However, in doing so we stand the risk of leaking an
1171 existing tunnel process, so we always disconnect any older tunnels
1172 we might have through rpc_disconnect.
1173
1174 @param port: port on the remote host that is serving this proxy.
1175
1176 @return: The client proxy.
1177 """
1178 if not jsonrpclib:
1179 logging.warning('Jsonrpclib could not be imported. Check that '
1180 'site-packages contains jsonrpclib.')
1181 return None
1182
1183 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1184
1185 logging.info('Established a jsonrpc connection through port %s.', port)
1186 return proxy
1187
1188
1189 def rpc_disconnect(self, port):
1190 """Disconnect from an RPC server on the host.
1191
1192 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001193 the given `port`. Also closes the local ssh tunnel created
1194 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001195 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001196 client object; however disconnection will cause all
1197 subsequent calls to methods on the object to fail.
1198
1199 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001200 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001201
1202 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001203 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001204 """
beeps32a63082013-08-22 14:02:29 -07001205 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001206 return
beeps32a63082013-08-22 14:02:29 -07001207 entry = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001208 remote_name = entry[0]
1209 tunnel_proc = entry[1]
1210 if remote_name:
1211 # We use 'pkill' to find our target process rather than
1212 # a PID, because the host may have rebooted since
1213 # connecting, and we don't want to kill an innocent
1214 # process with the same PID.
1215 #
1216 # 'pkill' helpfully exits with status 1 if no target
1217 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001218 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001219 # status.
1220 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
1221
1222 if tunnel_proc.poll() is None:
1223 tunnel_proc.terminate()
1224 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1225 else:
1226 logging.debug('Tunnel pid %d terminated early, status %d',
1227 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001228 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001229
1230
beeps32a63082013-08-22 14:02:29 -07001231 def rpc_disconnect_all(self):
1232 """Disconnect all known RPC proxy ports."""
1233 for port in self._rpc_proxy_map.keys():
1234 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001235
1236
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001237 def _ping_check_status(self, status):
1238 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001239
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001240 @param status Check the ping status against this value.
1241 @return True iff `status` and the result of ping are the same
1242 (i.e. both True or both False).
1243
1244 """
1245 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1246 return not (status ^ (ping_val == 0))
1247
1248 def _ping_wait_for_status(self, status, timeout):
1249 """Wait for the host to have a given status (UP or DOWN).
1250
1251 Status is checked by polling. Polling will not last longer
1252 than the number of seconds in `timeout`. The polling
1253 interval will be long enough that only approximately
1254 _PING_WAIT_COUNT polling cycles will be executed, subject
1255 to a maximum interval of about one minute.
1256
1257 @param status Waiting will stop immediately if `ping` of the
1258 host returns this status.
1259 @param timeout Poll for at most this many seconds.
1260 @return True iff the host status from `ping` matched the
1261 requested status at the time of return.
1262
1263 """
1264 # _ping_check_status() takes about 1 second, hence the
1265 # "- 1" in the formula below.
1266 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1267 end_time = time.time() + timeout
1268 while time.time() <= end_time:
1269 if self._ping_check_status(status):
1270 return True
1271 if poll_interval > 0:
1272 time.sleep(poll_interval)
1273
1274 # The last thing we did was sleep(poll_interval), so it may
1275 # have been too long since the last `ping`. Check one more
1276 # time, just to be sure.
1277 return self._ping_check_status(status)
1278
1279 def ping_wait_up(self, timeout):
1280 """Wait for the host to respond to `ping`.
1281
1282 N.B. This method is not a reliable substitute for
1283 `wait_up()`, because a host that responds to ping will not
1284 necessarily respond to ssh. This method should only be used
1285 if the target DUT can be considered functional even if it
1286 can't be reached via ssh.
1287
1288 @param timeout Minimum time to allow before declaring the
1289 host to be non-responsive.
1290 @return True iff the host answered to ping before the timeout.
1291
1292 """
1293 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001294
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001295 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001296 """Wait until the host no longer responds to `ping`.
1297
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001298 This function can be used as a slightly faster version of
1299 `wait_down()`, by avoiding potentially long ssh timeouts.
1300
1301 @param timeout Minimum time to allow for the host to become
1302 non-responsive.
1303 @return True iff the host quit answering ping before the
1304 timeout.
1305
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001306 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001307 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001308
1309 def test_wait_for_sleep(self):
1310 """Wait for the client to enter low-power sleep mode.
1311
1312 The test for "is asleep" can't distinguish a system that is
1313 powered off; to confirm that the unit was asleep, it is
1314 necessary to force resume, and then call
1315 `test_wait_for_resume()`.
1316
1317 This function is expected to be called from a test as part
1318 of a sequence like the following:
1319
1320 ~~~~~~~~
1321 boot_id = host.get_boot_id()
1322 # trigger sleep on the host
1323 host.test_wait_for_sleep()
1324 # trigger resume on the host
1325 host.test_wait_for_resume(boot_id)
1326 ~~~~~~~~
1327
1328 @exception TestFail The host did not go to sleep within
1329 the allowed time.
1330 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001331 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001332 raise error.TestFail(
1333 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001334 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001335
1336
1337 def test_wait_for_resume(self, old_boot_id):
1338 """Wait for the client to resume from low-power sleep mode.
1339
1340 The `old_boot_id` parameter should be the value from
1341 `get_boot_id()` obtained prior to entering sleep mode. A
1342 `TestFail` exception is raised if the boot id changes.
1343
1344 See @ref test_wait_for_sleep for more on this function's
1345 usage.
1346
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001347 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001348 target host went to sleep.
1349
1350 @exception TestFail The host did not respond within the
1351 allowed time.
1352 @exception TestFail The host responded, but the boot id test
1353 indicated a reboot rather than a sleep
1354 cycle.
1355 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001356 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001357 raise error.TestFail(
1358 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001359 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001360 else:
1361 new_boot_id = self.get_boot_id()
1362 if new_boot_id != old_boot_id:
1363 raise error.TestFail(
1364 'client rebooted, but sleep was expected'
1365 ' (old boot %s, new boot %s)'
1366 % (old_boot_id, new_boot_id))
1367
1368
1369 def test_wait_for_shutdown(self):
1370 """Wait for the client to shut down.
1371
1372 The test for "has shut down" can't distinguish a system that
1373 is merely asleep; to confirm that the unit was down, it is
1374 necessary to force boot, and then call test_wait_for_boot().
1375
1376 This function is expected to be called from a test as part
1377 of a sequence like the following:
1378
1379 ~~~~~~~~
1380 boot_id = host.get_boot_id()
1381 # trigger shutdown on the host
1382 host.test_wait_for_shutdown()
1383 # trigger boot on the host
1384 host.test_wait_for_boot(boot_id)
1385 ~~~~~~~~
1386
1387 @exception TestFail The host did not shut down within the
1388 allowed time.
1389 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001390 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001391 raise error.TestFail(
1392 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001393 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001394
1395
1396 def test_wait_for_boot(self, old_boot_id=None):
1397 """Wait for the client to boot from cold power.
1398
1399 The `old_boot_id` parameter should be the value from
1400 `get_boot_id()` obtained prior to shutting down. A
1401 `TestFail` exception is raised if the boot id does not
1402 change. The boot id test is omitted if `old_boot_id` is not
1403 specified.
1404
1405 See @ref test_wait_for_shutdown for more on this function's
1406 usage.
1407
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001408 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001409 shut down.
1410
1411 @exception TestFail The host did not respond within the
1412 allowed time.
1413 @exception TestFail The host responded, but the boot id test
1414 indicated that there was no reboot.
1415 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001416 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001417 raise error.TestFail(
1418 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001419 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001420 elif old_boot_id:
1421 if self.get_boot_id() == old_boot_id:
1422 raise error.TestFail(
1423 'client is back up, but did not reboot'
1424 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001425
1426
1427 @staticmethod
1428 def check_for_rpm_support(hostname):
1429 """For a given hostname, return whether or not it is powered by an RPM.
1430
1431 @return None if this host does not follows the defined naming format
1432 for RPM powered DUT's in the lab. If it does follow the format,
1433 it returns a regular expression MatchObject instead.
1434 """
Richard Barnette82c35912012-11-20 10:09:10 -08001435 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001436
1437
1438 def has_power(self):
1439 """For this host, return whether or not it is powered by an RPM.
1440
1441 @return True if this host is in the CROS lab and follows the defined
1442 naming format.
1443 """
1444 return SiteHost.check_for_rpm_support(self.hostname)
1445
1446
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001447 def _set_power(self, state, power_method):
1448 """Sets the power to the host via RPM, Servo or manual.
1449
1450 @param state Specifies which power state to set to DUT
1451 @param power_method Specifies which method of power control to
1452 use. By default "RPM" will be used. Valid values
1453 are the strings "RPM", "manual", "servoj10".
1454
1455 """
1456 ACCEPTABLE_STATES = ['ON', 'OFF']
1457
1458 if state.upper() not in ACCEPTABLE_STATES:
1459 raise error.TestError('State must be one of: %s.'
1460 % (ACCEPTABLE_STATES,))
1461
1462 if power_method == self.POWER_CONTROL_SERVO:
1463 logging.info('Setting servo port J10 to %s', state)
1464 self.servo.set('prtctl3_pwren', state.lower())
1465 time.sleep(self._USB_POWER_TIMEOUT)
1466 elif power_method == self.POWER_CONTROL_MANUAL:
1467 logging.info('You have %d seconds to set the AC power to %s.',
1468 self._POWER_CYCLE_TIMEOUT, state)
1469 time.sleep(self._POWER_CYCLE_TIMEOUT)
1470 else:
1471 if not self.has_power():
1472 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001473 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1474 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1475 hostname=self.hostname)
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001476 rpm_client.set_power(self.hostname, state.upper())
Simran Basid5e5e272012-09-24 15:23:59 -07001477
1478
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001479 def power_off(self, power_method=POWER_CONTROL_RPM):
1480 """Turn off power to this host via RPM, Servo or manual.
1481
1482 @param power_method Specifies which method of power control to
1483 use. By default "RPM" will be used. Valid values
1484 are the strings "RPM", "manual", "servoj10".
1485
1486 """
1487 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001488
1489
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001490 def power_on(self, power_method=POWER_CONTROL_RPM):
1491 """Turn on power to this host via RPM, Servo or manual.
1492
1493 @param power_method Specifies which method of power control to
1494 use. By default "RPM" will be used. Valid values
1495 are the strings "RPM", "manual", "servoj10".
1496
1497 """
1498 self._set_power('ON', power_method)
1499
1500
1501 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1502 """Cycle power to this host by turning it OFF, then ON.
1503
1504 @param power_method Specifies which method of power control to
1505 use. By default "RPM" will be used. Valid values
1506 are the strings "RPM", "manual", "servoj10".
1507
1508 """
1509 if power_method in (self.POWER_CONTROL_SERVO,
1510 self.POWER_CONTROL_MANUAL):
1511 self.power_off(power_method=power_method)
1512 time.sleep(self._POWER_CYCLE_TIMEOUT)
1513 self.power_on(power_method=power_method)
1514 else:
1515 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001516
1517
1518 def get_platform(self):
1519 """Determine the correct platform label for this host.
1520
1521 @returns a string representing this host's platform.
1522 """
1523 crossystem = utils.Crossystem(self)
1524 crossystem.init()
1525 # Extract fwid value and use the leading part as the platform id.
1526 # fwid generally follow the format of {platform}.{firmware version}
1527 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1528 platform = crossystem.fwid().split('.')[0].lower()
1529 # Newer platforms start with 'Google_' while the older ones do not.
1530 return platform.replace('google_', '')
1531
1532
Aviv Keshet74c89a92013-02-04 15:18:30 -08001533 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001534 def get_board(self):
1535 """Determine the correct board label for this host.
1536
1537 @returns a string representing this host's board.
1538 """
1539 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1540 run_method=self.run)
1541 board = release_info['CHROMEOS_RELEASE_BOARD']
1542 # Devices in the lab generally have the correct board name but our own
1543 # development devices have {board_name}-signed-{key_type}. The board
1544 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001545 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001546 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001547 return board_format_string % board.split('-')[0]
1548 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001549
1550
Aviv Keshet74c89a92013-02-04 15:18:30 -08001551 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001552 def has_lightsensor(self):
1553 """Determine the correct board label for this host.
1554
1555 @returns the string 'lightsensor' if this host has a lightsensor or
1556 None if it does not.
1557 """
1558 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001559 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001560 try:
1561 # Run the search cmd following the symlinks. Stderr_tee is set to
1562 # None as there can be a symlink loop, but the command will still
1563 # execute correctly with a few messages printed to stderr.
1564 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1565 return 'lightsensor'
1566 except error.AutoservRunError:
1567 # egrep exited with a return code of 1 meaning none of the possible
1568 # lightsensor files existed.
1569 return None
1570
1571
Aviv Keshet74c89a92013-02-04 15:18:30 -08001572 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001573 def has_bluetooth(self):
1574 """Determine the correct board label for this host.
1575
1576 @returns the string 'bluetooth' if this host has bluetooth or
1577 None if it does not.
1578 """
1579 try:
1580 self.run('test -d /sys/class/bluetooth/hci0')
1581 # test exited with a return code of 0.
1582 return 'bluetooth'
1583 except error.AutoservRunError:
1584 # test exited with a return code 1 meaning the directory did not
1585 # exist.
1586 return None
1587
1588
Ilja Friedel0ce0b602013-08-15 18:45:27 -07001589 @label_decorator('graphics')
1590 def get_graphics(self):
1591 """
1592 Determine the correct board label for this host.
1593
1594 @returns a string representing this host's graphics. For now ARM boards
1595 return graphics:gles while all other boards return graphics:gl. This
1596 may change over time, but for robustness reasons this should avoid
1597 executing code in actual graphics libraries (which may not be ready and
1598 is tested by graphics_GLAPICheck).
1599 """
1600 uname = self.run('uname -a').stdout.lower()
1601 if 'arm' in uname:
1602 return 'graphics:gles'
1603 return 'graphics:gl'
1604
1605
Simran Basic6f1f7a2012-10-16 10:47:46 -07001606 def get_labels(self):
1607 """Return a list of labels for this given host.
1608
1609 This is the main way to retrieve all the automatic labels for a host
1610 as it will run through all the currently implemented label functions.
1611 """
1612 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001613 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001614 label = label_function(self)
1615 if label:
1616 labels.append(label)
1617 return labels