blob: 111987221f372313e536bea3b5e6718221f49cf6 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07006import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import logging
Dan Shi0f466e82013-02-22 15:44:58 -08008import os
Simran Basid5e5e272012-09-24 15:23:59 -07009import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080010import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070011import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070012import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070013import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070014
J. Richard Barnette45e93de2012-04-11 17:24:15 -070015from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080016from autotest_lib.client.common_lib import error
17from autotest_lib.client.common_lib import global_config
beeps687243d2013-07-18 15:29:27 -070018from autotest_lib.client.common_lib import site_utils
J. Richard Barnette45e93de2012-04-11 17:24:15 -070019from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080020from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080021from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080022from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070023from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070024from autotest_lib.server import autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070025from autotest_lib.server import utils as server_utils
Scott Zawalski89c44dd2013-02-26 09:28:02 -050026from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070027from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
J. Richard Barnette75487572013-03-08 12:47:50 -080028from autotest_lib.server.cros.servo import servo
Fang Deng96667ca2013-08-01 17:46:18 -070029from autotest_lib.server.hosts import abstract_ssh
beeps687243d2013-07-18 15:29:27 -070030from autotest_lib.site_utils.graphite import stats
Simran Basidcff4252012-11-20 16:13:20 -080031from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070032
33
Fang Deng96667ca2013-08-01 17:46:18 -070034GLOBAL_SSH_COMMAND_OPTIONS = ''
35
beeps32a63082013-08-22 14:02:29 -070036try:
37 import jsonrpclib
38except ImportError:
39 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070040
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080041def _make_servo_hostname(hostname):
42 host_parts = hostname.split('.')
43 host_parts[0] = host_parts[0] + '-servo'
44 return '.'.join(host_parts)
45
46
47def _get_lab_servo(target_hostname):
48 """Instantiate a Servo for |target_hostname| in the lab.
49
50 Assuming that |target_hostname| is a device in the CrOS test
51 lab, create and return a Servo object pointed at the servo
52 attached to that DUT. The servo in the test lab is assumed
53 to already have servod up and running on it.
54
55 @param target_hostname: device whose servo we want to target.
56 @return an appropriately configured Servo instance.
57 """
58 servo_host = _make_servo_hostname(target_hostname)
59 if utils.host_is_in_lab_zone(servo_host):
60 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080061 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080062 except: # pylint: disable=W0702
63 # TODO(jrbarnette): Long-term, if we can't get to
64 # a servo in the lab, we want to fail, so we should
65 # pass any exceptions along. Short-term, we're not
66 # ready to rely on servo, so we ignore failures.
67 pass
68 return None
69
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080070
Aviv Keshet74c89a92013-02-04 15:18:30 -080071def add_label_detector(label_function_list, label_list=None, label=None):
72 """Decorator used to group functions together into the provided list.
73 @param label_function_list: List of label detecting functions to add
74 decorated function to.
75 @param label_list: List of detectable labels to add detectable labels to.
76 (Default: None)
77 @param label: Label string that is detectable by this detection function
78 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080079 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070080 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080081 """
82 @param func: The function to be added as a detector.
83 """
84 label_function_list.append(func)
85 if label and label_list is not None:
86 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070087 return func
88 return add_func
89
90
Fang Deng96667ca2013-08-01 17:46:18 -070091class SiteHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070092 """Chromium OS specific subclass of Host."""
93
94 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050095 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070096
Richard Barnette0c73ffc2012-11-19 15:21:18 -080097 # Time to wait for new kernel to be marked successful after
98 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -070099 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700100
Richard Barnette03a0c132012-11-05 12:40:35 -0800101 # Timeout values (in seconds) associated with various Chrome OS
102 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700103 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800104 # In general, a good rule of thumb is that the timeout can be up
105 # to twice the typical measured value on the slowest platform.
106 # The times here have not necessarily been empirically tested to
107 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700108 #
109 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800110 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
111 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700112 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800113 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800114 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700115 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800116 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800117 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800118 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700119 # REBOOT_TIMEOUT: How long to wait for a reboot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800120 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700121
122 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800123 RESUME_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700124 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700125 USB_BOOT_TIMEOUT = 150
Chris Sosab76e0ee2013-05-22 16:55:41 -0700126
127 # We have a long timeout to ensure we don't flakily fail due to other
128 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
129 REBOOT_TIMEOUT = 300
130
Richard Barnette03a0c132012-11-05 12:40:35 -0800131 _INSTALL_TIMEOUT = 240
132
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800133 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
134 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
135 _USB_POWER_TIMEOUT = 5
136 _POWER_CYCLE_TIMEOUT = 10
137
beeps32a63082013-08-22 14:02:29 -0700138 _RPC_PROXY_URL = 'http://localhost:%d'
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800139
Richard Barnette82c35912012-11-20 10:09:10 -0800140 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
141 'rpm_recovery_boards', type=str).split(',')
142
143 _MAX_POWER_CYCLE_ATTEMPTS = 6
144 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
145 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
146 'host[0-9]+')
147 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
148 'in_illuminance0_raw',
149 'illuminance0_input']
150 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
151 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800152 _DETECTABLE_LABELS = []
153 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
154 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700155
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800156 # Constants used in ping_wait_up() and ping_wait_down().
157 #
158 # _PING_WAIT_COUNT is the approximate number of polling
159 # cycles to use when waiting for a host state change.
160 #
161 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
162 # for arguments to the internal _ping_wait_for_status()
163 # method.
164 _PING_WAIT_COUNT = 40
165 _PING_STATUS_DOWN = False
166 _PING_STATUS_UP = True
167
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800168 # Allowed values for the power_method argument.
169
170 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
171 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
172 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
173 POWER_CONTROL_RPM = 'RPM'
174 POWER_CONTROL_SERVO = 'servoj10'
175 POWER_CONTROL_MANUAL = 'manual'
176
177 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
178 POWER_CONTROL_SERVO,
179 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800180
Simran Basi5e6339a2013-03-21 11:34:32 -0700181 _RPM_OUTLET_CHANGED = 'outlet_changed'
182
beeps687243d2013-07-18 15:29:27 -0700183
J. Richard Barnette964fba02012-10-24 17:34:29 -0700184 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800185 def get_servo_arguments(args_dict):
186 """Extract servo options from `args_dict` and return the result.
187
188 Take the provided dictionary of argument options and return
189 a subset that represent standard arguments needed to
190 construct a servo object for a host. The intent is to
191 provide standard argument processing from run_remote_tests
192 for tests that require a servo to operate.
193
194 Recommended usage:
195 ~~~~~~~~
196 args_dict = utils.args_to_dict(args)
197 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
198 host = hosts.create_host(machine, servo_args=servo_args)
199 ~~~~~~~~
200
201 @param args_dict Dictionary from which to extract the servo
202 arguments.
203 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700204 servo_args = {}
205 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800206 if arg in args_dict:
207 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700208 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700209
J. Richard Barnette964fba02012-10-24 17:34:29 -0700210
211 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700212 """Initialize superclasses, and |self.servo|.
213
214 For creating the host servo object, there are three
215 possibilities: First, if the host is a lab system known to
216 have a servo board, we connect to that servo unconditionally.
217 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700218 servo features for testing, it will pass settings for
219 `servo_host`, `servo_port`, or both. If neither of these
220 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700221
222 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700223 super(SiteHost, self)._initialize(hostname=hostname,
224 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700225 # self.env is a dictionary of environment variable settings
226 # to be exported for commands run on the host.
227 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
228 # errors that might happen.
229 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700230 self._rpc_proxy_map = {}
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800231 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700232 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700233 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700234
235
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500236 def get_repair_image_name(self):
237 """Generate a image_name from variables in the global config.
238
239 @returns a str of $board-version/$BUILD.
240
241 """
242 stable_version = global_config.global_config.get_config_value(
243 'CROS', 'stable_cros_version')
244 build_pattern = global_config.global_config.get_config_value(
245 'CROS', 'stable_build_pattern')
246 board = self._get_board_from_afe()
247 if board is None:
248 raise error.AutoservError('DUT has no board attribute, '
249 'cannot be repaired.')
250 return build_pattern % (board, stable_version)
251
252
Scott Zawalski62bacae2013-03-05 10:40:32 -0500253 def _host_in_AFE(self):
254 """Check if the host is an object the AFE knows.
255
256 @returns the host object.
257 """
258 return self._AFE.get_hosts(hostname=self.hostname)
259
260
Chris Sosab76e0ee2013-05-22 16:55:41 -0700261 def lookup_job_repo_url(self):
262 """Looks up the job_repo_url for the host.
263
264 @returns job_repo_url from AFE or None if not found.
265
266 @raises KeyError if the host does not have a job_repo_url
267 """
268 if not self._host_in_AFE():
269 return None
270
271 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700272 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
273 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700274
275
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500276 def clear_cros_version_labels_and_job_repo_url(self):
277 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500278 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400279 return
280
Scott Zawalski62bacae2013-03-05 10:40:32 -0500281 host_list = [self.hostname]
282 labels = self._AFE.get_labels(
283 name__startswith=ds_constants.VERSION_PREFIX,
284 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800285
Scott Zawalski62bacae2013-03-05 10:40:32 -0500286 for label in labels:
287 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500288
beepscb6f1e22013-06-28 19:14:10 -0700289 self.update_job_repo_url(None, None)
290
291
292 def update_job_repo_url(self, devserver_url, image_name):
293 """
294 Updates the job_repo_url host attribute and asserts it's value.
295
296 @param devserver_url: The devserver to use in the job_repo_url.
297 @param image_name: The name of the image to use in the job_repo_url.
298
299 @raises AutoservError: If we failed to update the job_repo_url.
300 """
301 repo_url = None
302 if devserver_url and image_name:
303 repo_url = tools.get_package_url(devserver_url, image_name)
304 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500305 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700306 if self.lookup_job_repo_url() != repo_url:
307 raise error.AutoservError('Failed to update job_repo_url with %s, '
308 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500309
310
Dan Shie9309262013-06-19 22:50:21 -0700311 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400312 """Add cros_version labels and host attribute job_repo_url.
313
314 @param image_name: The name of the image e.g.
315 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700316
Scott Zawalskieadbf702013-03-14 09:23:06 -0400317 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500318 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400319 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500320
Scott Zawalskieadbf702013-03-14 09:23:06 -0400321 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700322 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500323
324 labels = self._AFE.get_labels(name=cros_label)
325 if labels:
326 label = labels[0]
327 else:
328 label = self._AFE.create_label(name=cros_label)
329
330 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700331 self.update_job_repo_url(devserver_url, image_name)
332
333
beepsdae65fd2013-07-26 16:24:41 -0700334 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700335 """
336 Make sure job_repo_url of this host is valid.
337
joychen03eaad92013-06-26 09:55:21 -0700338 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700339 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
340 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
341 download and extract it. If the devserver embedded in the url is
342 unresponsive, update the job_repo_url of the host after staging it on
343 another devserver.
344
345 @param job_repo_url: A url pointing to the devserver where the autotest
346 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700347 @param tag: The tag from the server job, in the format
348 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700349
350 @raises DevServerException: If we could not resolve a devserver.
351 @raises AutoservError: If we're unable to save the new job_repo_url as
352 a result of choosing a new devserver because the old one failed to
353 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700354 @raises urllib2.URLError: If the devserver embedded in job_repo_url
355 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700356 """
357 job_repo_url = self.lookup_job_repo_url()
358 if not job_repo_url:
359 logging.warning('No job repo url set on host %s', self.hostname)
360 return
361
362 logging.info('Verifying job repo url %s', job_repo_url)
363 devserver_url, image_name = tools.get_devserver_build_from_package_url(
364 job_repo_url)
365
beeps0c865032013-07-30 11:37:06 -0700366 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700367
368 logging.info('Staging autotest artifacts for %s on devserver %s',
369 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700370
371 start_time = time.time()
beepscb6f1e22013-06-28 19:14:10 -0700372 ds.stage_artifacts(image_name, ['autotest'])
beeps687243d2013-07-18 15:29:27 -0700373 stage_time = time.time() - start_time
374
375 # Record how much of the verification time comes from a devserver
376 # restage. If we're doing things right we should not see multiple
377 # devservers for a given board/build/branch path.
378 try:
379 board, build_type, branch = site_utils.ParseBuildName(
380 image_name)[:3]
381 except site_utils.ParseBuildNameException as e:
382 pass
383 else:
beeps0c865032013-07-30 11:37:06 -0700384 devserver = devserver_url[
385 devserver_url.find('/')+2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700386 stats_key = {
387 'board': board,
388 'build_type': build_type,
389 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700390 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700391 }
392 stats.Gauge('verify_job_repo_url').send(
393 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
394 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700395
Scott Zawalskieadbf702013-03-14 09:23:06 -0400396
Dan Shi0f466e82013-02-22 15:44:58 -0800397 def _try_stateful_update(self, update_url, force_update, updater):
398 """Try to use stateful update to initialize DUT.
399
400 When DUT is already running the same version that machine_install
401 tries to install, stateful update is a much faster way to clean up
402 the DUT for testing, compared to a full reimage. It is implemeted
403 by calling autoupdater.run_update, but skipping updating root, as
404 updating the kernel is time consuming and not necessary.
405
406 @param update_url: url of the image.
407 @param force_update: Set to True to update the image even if the DUT
408 is running the same version.
409 @param updater: ChromiumOSUpdater instance used to update the DUT.
410 @returns: True if the DUT was updated with stateful update.
411
412 """
413 if not updater.check_version():
414 return False
415 if not force_update:
416 logging.info('Canceling stateful update because the new and '
417 'old versions are the same.')
418 return False
419 # Following folders should be rebuilt after stateful update.
420 # A test file is used to confirm each folder gets rebuilt after
421 # the stateful update.
422 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
423 test_file = '.test_file_to_be_deleted'
424 for folder in folders_to_check:
425 touch_path = os.path.join(folder, test_file)
426 self.run('touch %s' % touch_path)
427
428 if not updater.run_update(force_update=True, update_root=False):
429 return False
430
431 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700432 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800433 check_file_cmd = 'test -f %s; echo $?'
434 for folder in folders_to_check:
435 test_file_path = os.path.join(folder, test_file)
436 result = self.run(check_file_cmd % test_file_path,
437 ignore_status=True)
438 if result.exit_status == 1:
439 return False
440 return True
441
442
J. Richard Barnette7275b612013-06-04 18:13:11 -0700443 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800444 """After the DUT is updated, confirm machine_install succeeded.
445
446 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700447 @param expected_kernel: kernel expected to be active after reboot,
448 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800449
450 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700451 # Touch the lab machine file to leave a marker that
452 # distinguishes this image from other test images.
453 # Afterwards, we must re-run the autoreboot script because
454 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800455 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800456 self.run('start autoreboot')
457
J. Richard Barnette7275b612013-06-04 18:13:11 -0700458 # Figure out the newly active kernel.
459 active_kernel, _ = updater.get_kernel_state()
460
461 # Check for rollback due to a bad build.
462 if expected_kernel and active_kernel != expected_kernel:
463 # Print out some information to make it easier to debug
464 # the rollback.
Dan Shi0f466e82013-02-22 15:44:58 -0800465 logging.debug('Dumping partition table.')
Dan Shi346725f2013-03-20 15:22:38 -0700466 self.run('cgpt show $(rootdev -s -d)')
Dan Shi0f466e82013-02-22 15:44:58 -0800467 logging.debug('Dumping crossystem for firmware debugging.')
Dan Shi346725f2013-03-20 15:22:38 -0700468 self.run('crossystem --all')
Dan Shi0f466e82013-02-22 15:44:58 -0800469 raise autoupdater.ChromiumOSError(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700470 'Build %s failed to boot on %s; system rolled back '
471 'to previous build' % (updater.update_version,
472 self.hostname))
Dan Shi0f466e82013-02-22 15:44:58 -0800473
J. Richard Barnette7275b612013-06-04 18:13:11 -0700474 # Check that we've got the build we meant to install.
475 if not updater.check_version_to_confirm_install():
476 raise autoupdater.ChromiumOSError(
477 'Failed to update %s to build %s; found build '
478 '%s instead' % (self.hostname,
479 updater.update_version,
480 updater.get_build_id()))
Scott Zawalski62bacae2013-03-05 10:40:32 -0500481
J. Richard Barnette7275b612013-06-04 18:13:11 -0700482 # Make sure chromeos-setgoodkernel runs.
483 try:
Dan Shi0f466e82013-02-22 15:44:58 -0800484 utils.poll_for_condition(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700485 lambda: (updater.get_kernel_tries(active_kernel) == 0
486 and updater.get_kernel_success(active_kernel)),
487 exception=autoupdater.ChromiumOSError(),
Dan Shi0f466e82013-02-22 15:44:58 -0800488 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
J. Richard Barnette7275b612013-06-04 18:13:11 -0700489 except autoupdater.ChromiumOSError as e:
490 services_status = self.run('status system-services').stdout
491 if services_status != 'system-services start/running\n':
492 event = ('Chrome failed to reach login screen')
493 else:
494 event = ('update-engine failed to call '
495 'chromeos-setgoodkernel')
496 raise autoupdater.ChromiumOSError(
497 'After update and reboot, %s '
498 'within %d seconds' % (event,
499 self._KERNEL_UPDATE_TIMEOUT))
Dan Shi0f466e82013-02-22 15:44:58 -0800500
501
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700502 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400503 """Stage a build on a devserver and return the update_url.
504
505 @param image_name: a name like lumpy-release/R27-3837.0.0
506 @returns an update URL like:
507 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
508 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700509 if not image_name:
510 image_name = self.get_repair_image_name()
511 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400512 devserver = dev_server.ImageServer.resolve(image_name)
513 devserver.trigger_download(image_name, synchronous=False)
514 return tools.image_url_pattern() % (devserver.url(), image_name)
515
516
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700517 def stage_image_for_servo(self, image_name=None):
518 """Stage a build on a devserver and return the update_url.
519
520 @param image_name: a name like lumpy-release/R27-3837.0.0
521 @returns an update URL like:
522 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
523 """
524 if not image_name:
525 image_name = self.get_repair_image_name()
526 logging.info('Staging build for servo install: %s', image_name)
527 devserver = dev_server.ImageServer.resolve(image_name)
528 devserver.stage_artifacts(image_name, ['test_image'])
529 return devserver.get_test_image_url(image_name)
530
531
beepse539be02013-07-31 21:57:39 -0700532 def stage_factory_image_for_servo(self, image_name):
533 """Stage a build on a devserver and return the update_url.
534
535 @param image_name: a name like <baord>/4262.204.0
536 @return: An update URL, eg:
537 http://<devserver>/static/canary-channel/\
538 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
539 """
540 if not image_name:
541 logging.error('Need an image_name to stage a factory image.')
542 return
543
544 logging.info('Staging build for servo install: %s', image_name)
545 devserver = dev_server.ImageServer.resolve(image_name)
546 devserver.stage_artifacts(
547 image_name,
548 ['factory_image'],
549 archive_url=dev_server._get_canary_channel_server())
550
551 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
552
553
Chris Sosaa3ac2152012-05-23 22:23:13 -0700554 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500555 local_devserver=False, repair=False):
556 """Install the DUT.
557
Dan Shi0f466e82013-02-22 15:44:58 -0800558 Use stateful update if the DUT is already running the same build.
559 Stateful update does not update kernel and tends to run much faster
560 than a full reimage. If the DUT is running a different build, or it
561 failed to do a stateful update, full update, including kernel update,
562 will be applied to the DUT.
563
Scott Zawalskieadbf702013-03-14 09:23:06 -0400564 Once a host enters machine_install its cros_version label will be
565 removed as well as its host attribute job_repo_url (used for
566 package install).
567
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500568 @param update_url: The url to use for the update
569 pattern: http://$devserver:###/update/$build
570 If update_url is None and repair is True we will install the
571 stable image listed in global_config under
572 CROS.stable_cros_version.
573 @param force_update: Force an update even if the version installed
574 is the same. Default:False
575 @param local_devserver: Used by run_remote_test to allow people to
576 use their local devserver. Default: False
577 @param repair: Whether or not we are in repair mode. This adds special
578 cases for repairing a machine like starting update_engine.
579 Setting repair to True sets force_update to True as well.
580 default: False
581 @raises autoupdater.ChromiumOSError
582
583 """
Dan Shi7458bf62013-06-10 12:50:16 -0700584 if update_url:
585 logging.debug('update url is set to %s', update_url)
586 else:
587 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700588 if self._parser.options.image:
589 requested_build = self._parser.options.image
590 if requested_build.startswith('http://'):
591 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700592 logging.debug('update url is retrieved from requested_build'
593 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700594 else:
595 # Try to stage any build that does not start with
596 # http:// on the devservers defined in
597 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700598 update_url = self._stage_image_for_update(requested_build)
599 logging.debug('Build staged, and update_url is set to: %s',
600 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700601 elif repair:
602 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700603 logging.debug('Build staged, and update_url is set to: %s',
604 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400605 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700606 raise autoupdater.ChromiumOSError(
607 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500608
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500609 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800610 # In case the system is in a bad state, we always reboot the machine
611 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700612 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500613 self.run('stop update-engine; start update-engine')
614 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800615
Chris Sosaa3ac2152012-05-23 22:23:13 -0700616 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700617 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800618 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400619 # Remove cros-version and job_repo_url host attribute from host.
620 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800621 # If the DUT is already running the same build, try stateful update
622 # first. Stateful update does not update kernel and tends to run much
623 # faster than a full reimage.
624 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700625 updated = self._try_stateful_update(
626 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800627 if updated:
628 logging.info('DUT is updated with stateful update.')
629 except Exception as e:
630 logging.exception(e)
631 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700632
Dan Shi0f466e82013-02-22 15:44:58 -0800633 inactive_kernel = None
634 # Do a full update if stateful update is not applicable or failed.
635 if not updated:
636 # In case the system is in a bad state, we always reboot the
637 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700638 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700639
640 # TODO(sosa): Remove temporary hack to get rid of bricked machines
641 # that can't update due to a corrupted policy.
642 self.run('rm -rf /var/lib/whitelist')
643 self.run('touch /var/lib/whitelist')
644 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400645 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700646
Dan Shi0f466e82013-02-22 15:44:58 -0800647 if updater.run_update(force_update):
648 updated = True
649 # Figure out active and inactive kernel.
650 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700651
Dan Shi0f466e82013-02-22 15:44:58 -0800652 # Ensure inactive kernel has higher priority than active.
653 if (updater.get_kernel_priority(inactive_kernel)
654 < updater.get_kernel_priority(active_kernel)):
655 raise autoupdater.ChromiumOSError(
656 'Update failed. The priority of the inactive kernel'
657 ' partition is less than that of the active kernel'
658 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700659
Dan Shi0f466e82013-02-22 15:44:58 -0800660 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700661 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700662
Dan Shi0f466e82013-02-22 15:44:58 -0800663 if updated:
664 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400665 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700666 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800667
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700668 # Clean up any old autotest directories which may be lying around.
669 for path in global_config.global_config.get_config_value(
670 'AUTOSERV', 'client_autodir_paths', type=list):
671 self.run('rm -rf ' + path)
672
673
Dan Shi10e992b2013-08-30 11:02:59 -0700674 def show_update_engine_log(self):
675 """Output update engine log."""
676 logging.debug('Dumping %s', constants.UPDATE_ENGINE_LOG)
677 self.run('cat %s' % constants.UPDATE_ENGINE_LOG)
678
679
Richard Barnette82c35912012-11-20 10:09:10 -0800680 def _get_board_from_afe(self):
681 """Retrieve this host's board from its labels in the AFE.
682
683 Looks for a host label of the form "board:<board>", and
684 returns the "<board>" part of the label. `None` is returned
685 if there is not a single, unique label matching the pattern.
686
687 @returns board from label, or `None`.
688 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700689 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800690
691
692 def get_build(self):
693 """Retrieve the current build for this Host from the AFE.
694
695 Looks through this host's labels in the AFE to determine its build.
696
697 @returns The current build or None if it could not find it or if there
698 were multiple build labels assigned to this host.
699 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700700 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800701
702
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500703 def _install_repair(self):
704 """Attempt to repair this host using upate-engine.
705
706 If the host is up, try installing the DUT with a stable
707 "repair" version of Chrome OS as defined in the global_config
708 under CROS.stable_cros_version.
709
Scott Zawalski62bacae2013-03-05 10:40:32 -0500710 @raises AutoservRepairMethodNA if the DUT is not reachable.
711 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500712
713 """
714 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500715 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500716
717 logging.info('Attempting to reimage machine to repair image.')
718 try:
719 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700720 except autoupdater.ChromiumOSError as e:
721 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500722 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500723 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500724
725
Scott Zawalski62bacae2013-03-05 10:40:32 -0500726 def servo_install(self, image_url=None):
727 """
728 Re-install the OS on the DUT by:
729 1) installing a test image on a USB storage device attached to the Servo
730 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800731 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700732 3) installing the image with chromeos-install.
733
Scott Zawalski62bacae2013-03-05 10:40:32 -0500734 @param image_url: If specified use as the url to install on the DUT.
735 otherwise boot the currently staged image on the USB stick.
Richard Barnette03a0c132012-11-05 12:40:35 -0800736
Scott Zawalski62bacae2013-03-05 10:40:32 -0500737 @raises AutoservError if the image fails to boot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800738 """
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700739 self.servo.install_recovery_image(image_url)
Richard Barnette03a0c132012-11-05 12:40:35 -0800740 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500741 raise error.AutoservRepairFailure(
742 'DUT failed to boot from USB after %d seconds' %
743 self.USB_BOOT_TIMEOUT)
744
745 self.run('chromeos-install --yes', timeout=self._INSTALL_TIMEOUT)
Richard Barnette03a0c132012-11-05 12:40:35 -0800746 self.servo.power_long_press()
Fang Dengafb88142013-05-30 17:44:31 -0700747 self.servo.switch_usbkey('off')
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700748 # We *must* use power_on() here; on Parrot it's how we get
749 # out of recovery mode.
750 self.servo.get_power_state_controller().power_on()
Richard Barnette03a0c132012-11-05 12:40:35 -0800751 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
752 raise error.AutoservError('DUT failed to reboot installed '
753 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500754 self.BOOT_TIMEOUT)
755
756
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700757 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500758 """Reinstall the DUT utilizing servo and a test image.
759
760 Re-install the OS on the DUT by:
761 1) installing a test image on a USB storage device attached to the Servo
762 board,
763 2) booting that image in recovery mode, and then
764 3) installing the image with chromeos-install.
765
Scott Zawalski62bacae2013-03-05 10:40:32 -0500766 @raises AutoservRepairMethodNA if the device does not have servo
767 support.
768
769 """
770 if not self.servo:
771 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
772 'DUT has no servo support.')
773
774 logging.info('Attempting to recovery servo enabled device with '
775 'servo_repair_reinstall')
776
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700777 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500778 self.servo_install(image_url)
779
780
781 def _servo_repair_power(self):
782 """Attempt to repair DUT using an attached Servo.
783
784 Attempt to power on the DUT via power_long_press.
785
786 @raises AutoservRepairMethodNA if the device does not have servo
787 support.
788 @raises AutoservRepairFailure if the repair fails for any reason.
789 """
790 if not self.servo:
791 raise error.AutoservRepairMethodNA('Repair Power NA: '
792 'DUT has no servo support.')
793
794 logging.info('Attempting to recover servo enabled device by '
795 'powering it off and on.')
796 self.servo.get_power_state_controller().power_off()
797 self.servo.get_power_state_controller().power_on()
798 if self.wait_up(self.BOOT_TIMEOUT):
799 return
800
801 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800802
803
Richard Barnette82c35912012-11-20 10:09:10 -0800804 def _powercycle_to_repair(self):
805 """Utilize the RPM Infrastructure to bring the host back up.
806
807 If the host is not up/repaired after the first powercycle we utilize
808 auto fallback to the last good install by powercycling and rebooting the
809 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500810
811 @raises AutoservRepairMethodNA if the device does not support remote
812 power.
813 @raises AutoservRepairFailure if the repair fails for any reason.
814
Richard Barnette82c35912012-11-20 10:09:10 -0800815 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500816 if not self.has_power():
817 raise error.AutoservRepairMethodNA('Device does not support power.')
818
Richard Barnette82c35912012-11-20 10:09:10 -0800819 logging.info('Attempting repair via RPM powercycle.')
820 failed_cycles = 0
821 self.power_cycle()
822 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
823 failed_cycles += 1
824 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -0500825 raise error.AutoservRepairFailure(
826 'Powercycled host %s %d times; device did not come back'
827 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -0800828 self.power_cycle()
829 if failed_cycles == 0:
830 logging.info('Powercycling was successful first time.')
831 else:
832 logging.info('Powercycling was successful after %d failures.',
833 failed_cycles)
834
835
836 def repair_full(self):
837 """Repair a host for repair level NO_PROTECTION.
838
839 This overrides the base class function for repair; it does
840 not call back to the parent class, but instead offers a
841 simplified implementation based on the capabilities in the
842 Chrome OS test lab.
843
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700844 If `self.verify()` fails, the following procedures are
845 attempted:
846 1. Try to re-install to a known stable image using
847 auto-update.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500848 2. If there's a servo for the DUT, try to power the DUT off and
849 on.
850 3. If there's a servo for the DUT, try to re-install via
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700851 the servo.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500852 4. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800853 by power-cycling.
854
855 As with the parent method, the last operation performed on
856 the DUT must be to call `self.verify()`; if that call fails,
857 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700858
Scott Zawalski62bacae2013-03-05 10:40:32 -0500859 @raises AutoservRepairTotalFailure if the repair process fails to
860 fix the DUT.
Richard Barnette82c35912012-11-20 10:09:10 -0800861 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500862 # TODO(scottz): This should use something similar to label_decorator,
863 # but needs to be populated in order so DUTs are repaired with the
864 # least amount of effort.
865 repair_funcs = [self._install_repair, self._servo_repair_power,
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700866 self._servo_repair_reinstall,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500867 self._powercycle_to_repair]
868 errors = []
869 for repair_func in repair_funcs:
870 try:
871 repair_func()
872 self.verify()
873 return
874 except Exception as e:
875 logging.warn('Failed to repair device: %s', e)
876 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500877
Scott Zawalski62bacae2013-03-05 10:40:32 -0500878 raise error.AutoservRepairTotalFailure(
879 'All attempts at repairing the device failed:\n%s' %
880 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -0800881
882
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700883 def close(self):
beeps32a63082013-08-22 14:02:29 -0700884 self.rpc_disconnect_all()
Aviv Keshet284b5812013-08-29 17:36:06 -0700885 super(SiteHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700886
887
Simran Basi5e6339a2013-03-21 11:34:32 -0700888 def _cleanup_poweron(self):
889 """Special cleanup method to make sure hosts always get power back."""
890 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
891 hosts = afe.get_hosts(hostname=self.hostname)
892 if not hosts or not (self._RPM_OUTLET_CHANGED in
893 hosts[0].attributes):
894 return
895 logging.debug('This host has recently interacted with the RPM'
896 ' Infrastructure. Ensuring power is on.')
897 try:
898 self.power_on()
899 except rpm_client.RemotePowerException:
900 # If cleanup has completed but there was an issue with the RPM
901 # Infrastructure, log an error message rather than fail cleanup
902 logging.error('Failed to turn Power On for this host after '
903 'cleanup through the RPM Infrastructure.')
904 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
905 hostname=self.hostname)
906
907
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700908 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700909 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800910 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500911 try:
912 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
913 '_clear_login_prompt_state')
914 self.run('restart ui')
915 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
916 '_wait_for_login_prompt')
Alex Millerf4517962013-02-25 15:03:02 -0800917 except (error.AutotestRunError, error.AutoservRunError):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500918 logging.warn('Unable to restart ui, rebooting device.')
919 # Since restarting the UI fails fall back to normal Autotest
920 # cleanup routines, i.e. reboot the machine.
921 super(SiteHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -0700922 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -0700923 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -0700924 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700925
926
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700927 def reboot(self, **dargs):
928 """
929 This function reboots the site host. The more generic
930 RemoteHost.reboot() performs sync and sleeps for 5
931 seconds. This is not necessary for Chrome OS devices as the
932 sync should be finished in a short time during the reboot
933 command.
934 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800935 if 'reboot_cmd' not in dargs:
936 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
937 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700938 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800939 if 'fastsync' not in dargs:
940 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700941 super(SiteHost, self).reboot(**dargs)
942
943
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700944 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800945 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700946
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800947 Tests for the following conditions:
948 1. All conditions tested by the parent version of this
949 function.
950 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -0700951 3. Sufficient space in /mnt/stateful_partition/encrypted.
952 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700953
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700954 """
955 super(SiteHost, self).verify_software()
956 self.check_diskspace(
957 '/mnt/stateful_partition',
958 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -0700959 'SERVER', 'gb_diskspace_required', type=float,
960 default=20.0))
961 self.check_diskspace(
962 '/mnt/stateful_partition/encrypted',
963 global_config.global_config.get_config_value(
964 'SERVER', 'gb_encrypted_diskspace_required', type=float,
965 default=0.1))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800966 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -0500967 # Makes sure python is present, loads and can use built in functions.
968 # We have seen cases where importing cPickle fails with undefined
969 # symbols in cPickle.so.
970 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700971
972
Fang Deng96667ca2013-08-01 17:46:18 -0700973 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
974 connect_timeout=None, alive_interval=None):
975 """Override default make_ssh_command to use options tuned for Chrome OS.
976
977 Tuning changes:
978 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
979 connection failure. Consistency with remote_access.sh.
980
981 - ServerAliveInterval=180; which causes SSH to ping connection every
982 180 seconds. In conjunction with ServerAliveCountMax ensures
983 that if the connection dies, Autotest will bail out quickly.
984 Originally tried 60 secs, but saw frequent job ABORTS where
985 the test completed successfully.
986
987 - ServerAliveCountMax=3; consistency with remote_access.sh.
988
989 - ConnectAttempts=4; reduce flakiness in connection errors;
990 consistency with remote_access.sh.
991
992 - UserKnownHostsFile=/dev/null; we don't care about the keys.
993 Host keys change with every new installation, don't waste
994 memory/space saving them.
995
996 - SSH protocol forced to 2; needed for ServerAliveInterval.
997
998 @param user User name to use for the ssh connection.
999 @param port Port on the target host to use for ssh connection.
1000 @param opts Additional options to the ssh command.
1001 @param hosts_file Ignored.
1002 @param connect_timeout Ignored.
1003 @param alive_interval Ignored.
1004 """
1005 base_command = ('/usr/bin/ssh -a -x %s %s -o StrictHostKeyChecking=no'
1006 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1007 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1008 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1009 ' -o Protocol=2 -l %s -p %d')
1010 return base_command % (GLOBAL_SSH_COMMAND_OPTIONS, opts, user, port)
1011
1012
beeps32a63082013-08-22 14:02:29 -07001013 def _create_ssh_tunnel(self, port, local_port):
1014 """Create an ssh tunnel from local_port to port.
1015
1016 @param port: remote port on the host.
1017 @param local_port: local forwarding port.
1018
1019 @return: the tunnel process.
1020 """
1021 # Chrome OS on the target closes down most external ports
1022 # for security. We could open the port, but doing that
1023 # would conflict with security tests that check that only
1024 # expected ports are open. So, to get to the port on the
1025 # target we use an ssh tunnel.
1026 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1027 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1028 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1029 logging.debug('Full tunnel command: %s', tunnel_cmd)
1030 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1031 logging.debug('Started ssh tunnel, local = %d'
1032 ' remote = %d, pid = %d',
1033 local_port, port, tunnel_proc.pid)
1034 return tunnel_proc
1035
1036
1037 def _setup_rpc(self, port, command_name):
1038 """Sets up a tunnel process and performs rpc connection book keeping.
1039
1040 This method assumes that xmlrpc and jsonrpc never conflict, since
1041 we can only either have an xmlrpc or a jsonrpc server listening on
1042 a remote port. As such, it enforces a single proxy->remote port
1043 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1044 and then tries to start an xmlrpc proxy forwarded to the same port,
1045 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1046
1047 1. None of the methods on the xmlrpc proxy will work because
1048 the server listening on B is jsonrpc.
1049
1050 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1051 server, as the only use case currently is goofy, which is tied to
1052 the factory image. It is much easier to handle a failed xmlrpc
1053 call on the client than it is to terminate goofy in this scenario,
1054 as doing the latter might leave the DUT in a hard to recover state.
1055
1056 With the current implementation newer rpc proxy connections will
1057 terminate the tunnel processes of older rpc connections tunneling
1058 to the same remote port. If methods are invoked on the client
1059 after this has happened they will fail with connection closed errors.
1060
1061 @param port: The remote forwarding port.
1062 @param command_name: The name of the remote process, to terminate
1063 using pkill.
1064
1065 @return A url that we can use to initiate the rpc connection.
1066 """
1067 self.rpc_disconnect(port)
1068 local_port = utils.get_unused_port()
1069 tunnel_proc = self._create_ssh_tunnel(port, local_port)
1070 self._rpc_proxy_map[port] = (command_name, tunnel_proc)
1071 return self._RPC_PROXY_URL % local_port
1072
1073
Christopher Wileyd78249a2013-03-01 13:05:31 -08001074 def xmlrpc_connect(self, command, port, command_name=None,
1075 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001076 """Connect to an XMLRPC server on the host.
1077
1078 The `command` argument should be a simple shell command that
1079 starts an XMLRPC server on the given `port`. The command
1080 must not daemonize, and must terminate cleanly on SIGTERM.
1081 The command is started in the background on the host, and a
1082 local XMLRPC client for the server is created and returned
1083 to the caller.
1084
1085 Note that the process of creating an XMLRPC client makes no
1086 attempt to connect to the remote server; the caller is
1087 responsible for determining whether the server is running
1088 correctly, and is ready to serve requests.
1089
Christopher Wileyd78249a2013-03-01 13:05:31 -08001090 Optionally, the caller can pass ready_test_name, a string
1091 containing the name of a method to call on the proxy. This
1092 method should take no parameters and return successfully only
1093 when the server is ready to process client requests. When
1094 ready_test_name is set, xmlrpc_connect will block until the
1095 proxy is ready, and throw a TestError if the server isn't
1096 ready by timeout_seconds.
1097
beeps32a63082013-08-22 14:02:29 -07001098 If a server is already running on the remote port, this
1099 method will kill it and disconnect the tunnel process
1100 associated with the connection before establishing a new one,
1101 by consulting the rpc_proxy_map in rpc_disconnect.
1102
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001103 @param command Shell command to start the server.
1104 @param port Port number on which the server is expected to
1105 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001106 @param command_name String to use as input to `pkill` to
1107 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001108 @param ready_test_name String containing the name of a
1109 method defined on the XMLRPC server.
1110 @param timeout_seconds Number of seconds to wait
1111 for the server to become 'ready.' Will throw a
1112 TestFail error if server is not ready in time.
1113
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001114 """
beeps32a63082013-08-22 14:02:29 -07001115 rpc_url = self._setup_rpc(port, command_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001116 # Start the server on the host. Redirection in the command
1117 # below is necessary, because 'ssh' won't terminate until
1118 # background child processes close stdin, stdout, and
1119 # stderr.
1120 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
beeps32a63082013-08-22 14:02:29 -07001121 try:
1122 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
1123 except Exception as e:
1124 self.rpc_disconnect(port)
1125 raise
1126
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001127 logging.debug('Started XMLRPC server on host %s, pid = %s',
1128 self.hostname, remote_pid)
1129
Christopher Wileyd78249a2013-03-01 13:05:31 -08001130 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
1131 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001132 # retry.retry logs each attempt; calculate delay_sec to
1133 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001134 @retry.retry((socket.error,
1135 xmlrpclib.ProtocolError,
1136 httplib.BadStatusLine),
Christopher Wileyd78249a2013-03-01 13:05:31 -08001137 timeout_min=timeout_seconds/60.0,
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001138 delay_sec=min(max(timeout_seconds/20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001139 def ready_test():
1140 """ Call proxy.ready_test_name(). """
1141 getattr(proxy, ready_test_name)()
1142 successful = False
1143 try:
1144 logging.info('Waiting %d seconds for XMLRPC server '
1145 'to start.', timeout_seconds)
1146 ready_test()
1147 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001148 finally:
1149 if not successful:
1150 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001151 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001152 logging.info('XMLRPC server started successfully.')
1153 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001154
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001155
beeps32a63082013-08-22 14:02:29 -07001156 def jsonrpc_connect(self, port):
1157 """Creates a jsonrpc proxy connection through an ssh tunnel.
1158
1159 This method exists to facilitate communication with goofy (which is
1160 the default system manager on all factory images) and as such, leaves
1161 most of the rpc server sanity checking to the caller. Unlike
1162 xmlrpc_connect, this method does not facilitate the creation of a remote
1163 jsonrpc server, as the only clients of this code are factory tests,
1164 for which the goofy system manager is built in to the image and starts
1165 when the target boots.
1166
1167 One can theoretically create multiple jsonrpc proxies all forwarded
1168 to the same remote port, provided the remote port has an rpc server
1169 listening. However, in doing so we stand the risk of leaking an
1170 existing tunnel process, so we always disconnect any older tunnels
1171 we might have through rpc_disconnect.
1172
1173 @param port: port on the remote host that is serving this proxy.
1174
1175 @return: The client proxy.
1176 """
1177 if not jsonrpclib:
1178 logging.warning('Jsonrpclib could not be imported. Check that '
1179 'site-packages contains jsonrpclib.')
1180 return None
1181
1182 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1183
1184 logging.info('Established a jsonrpc connection through port %s.', port)
1185 return proxy
1186
1187
1188 def rpc_disconnect(self, port):
1189 """Disconnect from an RPC server on the host.
1190
1191 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001192 the given `port`. Also closes the local ssh tunnel created
1193 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001194 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001195 client object; however disconnection will cause all
1196 subsequent calls to methods on the object to fail.
1197
1198 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001199 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001200
1201 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001202 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001203 """
beeps32a63082013-08-22 14:02:29 -07001204 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001205 return
beeps32a63082013-08-22 14:02:29 -07001206 entry = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001207 remote_name = entry[0]
1208 tunnel_proc = entry[1]
1209 if remote_name:
1210 # We use 'pkill' to find our target process rather than
1211 # a PID, because the host may have rebooted since
1212 # connecting, and we don't want to kill an innocent
1213 # process with the same PID.
1214 #
1215 # 'pkill' helpfully exits with status 1 if no target
1216 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001217 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001218 # status.
1219 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
1220
1221 if tunnel_proc.poll() is None:
1222 tunnel_proc.terminate()
1223 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1224 else:
1225 logging.debug('Tunnel pid %d terminated early, status %d',
1226 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001227 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001228
1229
beeps32a63082013-08-22 14:02:29 -07001230 def rpc_disconnect_all(self):
1231 """Disconnect all known RPC proxy ports."""
1232 for port in self._rpc_proxy_map.keys():
1233 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001234
1235
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001236 def _ping_check_status(self, status):
1237 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001238
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001239 @param status Check the ping status against this value.
1240 @return True iff `status` and the result of ping are the same
1241 (i.e. both True or both False).
1242
1243 """
1244 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1245 return not (status ^ (ping_val == 0))
1246
1247 def _ping_wait_for_status(self, status, timeout):
1248 """Wait for the host to have a given status (UP or DOWN).
1249
1250 Status is checked by polling. Polling will not last longer
1251 than the number of seconds in `timeout`. The polling
1252 interval will be long enough that only approximately
1253 _PING_WAIT_COUNT polling cycles will be executed, subject
1254 to a maximum interval of about one minute.
1255
1256 @param status Waiting will stop immediately if `ping` of the
1257 host returns this status.
1258 @param timeout Poll for at most this many seconds.
1259 @return True iff the host status from `ping` matched the
1260 requested status at the time of return.
1261
1262 """
1263 # _ping_check_status() takes about 1 second, hence the
1264 # "- 1" in the formula below.
1265 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1266 end_time = time.time() + timeout
1267 while time.time() <= end_time:
1268 if self._ping_check_status(status):
1269 return True
1270 if poll_interval > 0:
1271 time.sleep(poll_interval)
1272
1273 # The last thing we did was sleep(poll_interval), so it may
1274 # have been too long since the last `ping`. Check one more
1275 # time, just to be sure.
1276 return self._ping_check_status(status)
1277
1278 def ping_wait_up(self, timeout):
1279 """Wait for the host to respond to `ping`.
1280
1281 N.B. This method is not a reliable substitute for
1282 `wait_up()`, because a host that responds to ping will not
1283 necessarily respond to ssh. This method should only be used
1284 if the target DUT can be considered functional even if it
1285 can't be reached via ssh.
1286
1287 @param timeout Minimum time to allow before declaring the
1288 host to be non-responsive.
1289 @return True iff the host answered to ping before the timeout.
1290
1291 """
1292 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001293
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001294 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001295 """Wait until the host no longer responds to `ping`.
1296
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001297 This function can be used as a slightly faster version of
1298 `wait_down()`, by avoiding potentially long ssh timeouts.
1299
1300 @param timeout Minimum time to allow for the host to become
1301 non-responsive.
1302 @return True iff the host quit answering ping before the
1303 timeout.
1304
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001305 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001306 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001307
1308 def test_wait_for_sleep(self):
1309 """Wait for the client to enter low-power sleep mode.
1310
1311 The test for "is asleep" can't distinguish a system that is
1312 powered off; to confirm that the unit was asleep, it is
1313 necessary to force resume, and then call
1314 `test_wait_for_resume()`.
1315
1316 This function is expected to be called from a test as part
1317 of a sequence like the following:
1318
1319 ~~~~~~~~
1320 boot_id = host.get_boot_id()
1321 # trigger sleep on the host
1322 host.test_wait_for_sleep()
1323 # trigger resume on the host
1324 host.test_wait_for_resume(boot_id)
1325 ~~~~~~~~
1326
1327 @exception TestFail The host did not go to sleep within
1328 the allowed time.
1329 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001330 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001331 raise error.TestFail(
1332 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001333 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001334
1335
1336 def test_wait_for_resume(self, old_boot_id):
1337 """Wait for the client to resume from low-power sleep mode.
1338
1339 The `old_boot_id` parameter should be the value from
1340 `get_boot_id()` obtained prior to entering sleep mode. A
1341 `TestFail` exception is raised if the boot id changes.
1342
1343 See @ref test_wait_for_sleep for more on this function's
1344 usage.
1345
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001346 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001347 target host went to sleep.
1348
1349 @exception TestFail The host did not respond within the
1350 allowed time.
1351 @exception TestFail The host responded, but the boot id test
1352 indicated a reboot rather than a sleep
1353 cycle.
1354 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001355 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001356 raise error.TestFail(
1357 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001358 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001359 else:
1360 new_boot_id = self.get_boot_id()
1361 if new_boot_id != old_boot_id:
1362 raise error.TestFail(
1363 'client rebooted, but sleep was expected'
1364 ' (old boot %s, new boot %s)'
1365 % (old_boot_id, new_boot_id))
1366
1367
1368 def test_wait_for_shutdown(self):
1369 """Wait for the client to shut down.
1370
1371 The test for "has shut down" can't distinguish a system that
1372 is merely asleep; to confirm that the unit was down, it is
1373 necessary to force boot, and then call test_wait_for_boot().
1374
1375 This function is expected to be called from a test as part
1376 of a sequence like the following:
1377
1378 ~~~~~~~~
1379 boot_id = host.get_boot_id()
1380 # trigger shutdown on the host
1381 host.test_wait_for_shutdown()
1382 # trigger boot on the host
1383 host.test_wait_for_boot(boot_id)
1384 ~~~~~~~~
1385
1386 @exception TestFail The host did not shut down within the
1387 allowed time.
1388 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001389 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001390 raise error.TestFail(
1391 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001392 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001393
1394
1395 def test_wait_for_boot(self, old_boot_id=None):
1396 """Wait for the client to boot from cold power.
1397
1398 The `old_boot_id` parameter should be the value from
1399 `get_boot_id()` obtained prior to shutting down. A
1400 `TestFail` exception is raised if the boot id does not
1401 change. The boot id test is omitted if `old_boot_id` is not
1402 specified.
1403
1404 See @ref test_wait_for_shutdown for more on this function's
1405 usage.
1406
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001407 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001408 shut down.
1409
1410 @exception TestFail The host did not respond within the
1411 allowed time.
1412 @exception TestFail The host responded, but the boot id test
1413 indicated that there was no reboot.
1414 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001415 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001416 raise error.TestFail(
1417 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001418 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001419 elif old_boot_id:
1420 if self.get_boot_id() == old_boot_id:
1421 raise error.TestFail(
1422 'client is back up, but did not reboot'
1423 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001424
1425
1426 @staticmethod
1427 def check_for_rpm_support(hostname):
1428 """For a given hostname, return whether or not it is powered by an RPM.
1429
1430 @return None if this host does not follows the defined naming format
1431 for RPM powered DUT's in the lab. If it does follow the format,
1432 it returns a regular expression MatchObject instead.
1433 """
Richard Barnette82c35912012-11-20 10:09:10 -08001434 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001435
1436
1437 def has_power(self):
1438 """For this host, return whether or not it is powered by an RPM.
1439
1440 @return True if this host is in the CROS lab and follows the defined
1441 naming format.
1442 """
1443 return SiteHost.check_for_rpm_support(self.hostname)
1444
1445
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001446 def _set_power(self, state, power_method):
1447 """Sets the power to the host via RPM, Servo or manual.
1448
1449 @param state Specifies which power state to set to DUT
1450 @param power_method Specifies which method of power control to
1451 use. By default "RPM" will be used. Valid values
1452 are the strings "RPM", "manual", "servoj10".
1453
1454 """
1455 ACCEPTABLE_STATES = ['ON', 'OFF']
1456
1457 if state.upper() not in ACCEPTABLE_STATES:
1458 raise error.TestError('State must be one of: %s.'
1459 % (ACCEPTABLE_STATES,))
1460
1461 if power_method == self.POWER_CONTROL_SERVO:
1462 logging.info('Setting servo port J10 to %s', state)
1463 self.servo.set('prtctl3_pwren', state.lower())
1464 time.sleep(self._USB_POWER_TIMEOUT)
1465 elif power_method == self.POWER_CONTROL_MANUAL:
1466 logging.info('You have %d seconds to set the AC power to %s.',
1467 self._POWER_CYCLE_TIMEOUT, state)
1468 time.sleep(self._POWER_CYCLE_TIMEOUT)
1469 else:
1470 if not self.has_power():
1471 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001472 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1473 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1474 hostname=self.hostname)
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001475 rpm_client.set_power(self.hostname, state.upper())
Simran Basid5e5e272012-09-24 15:23:59 -07001476
1477
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001478 def power_off(self, power_method=POWER_CONTROL_RPM):
1479 """Turn off power to this host via RPM, Servo or manual.
1480
1481 @param power_method Specifies which method of power control to
1482 use. By default "RPM" will be used. Valid values
1483 are the strings "RPM", "manual", "servoj10".
1484
1485 """
1486 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001487
1488
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001489 def power_on(self, power_method=POWER_CONTROL_RPM):
1490 """Turn on power to this host via RPM, Servo or manual.
1491
1492 @param power_method Specifies which method of power control to
1493 use. By default "RPM" will be used. Valid values
1494 are the strings "RPM", "manual", "servoj10".
1495
1496 """
1497 self._set_power('ON', power_method)
1498
1499
1500 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1501 """Cycle power to this host by turning it OFF, then ON.
1502
1503 @param power_method Specifies which method of power control to
1504 use. By default "RPM" will be used. Valid values
1505 are the strings "RPM", "manual", "servoj10".
1506
1507 """
1508 if power_method in (self.POWER_CONTROL_SERVO,
1509 self.POWER_CONTROL_MANUAL):
1510 self.power_off(power_method=power_method)
1511 time.sleep(self._POWER_CYCLE_TIMEOUT)
1512 self.power_on(power_method=power_method)
1513 else:
1514 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001515
1516
1517 def get_platform(self):
1518 """Determine the correct platform label for this host.
1519
1520 @returns a string representing this host's platform.
1521 """
1522 crossystem = utils.Crossystem(self)
1523 crossystem.init()
1524 # Extract fwid value and use the leading part as the platform id.
1525 # fwid generally follow the format of {platform}.{firmware version}
1526 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1527 platform = crossystem.fwid().split('.')[0].lower()
1528 # Newer platforms start with 'Google_' while the older ones do not.
1529 return platform.replace('google_', '')
1530
1531
Aviv Keshet74c89a92013-02-04 15:18:30 -08001532 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001533 def get_board(self):
1534 """Determine the correct board label for this host.
1535
1536 @returns a string representing this host's board.
1537 """
1538 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1539 run_method=self.run)
1540 board = release_info['CHROMEOS_RELEASE_BOARD']
1541 # Devices in the lab generally have the correct board name but our own
1542 # development devices have {board_name}-signed-{key_type}. The board
1543 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001544 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001545 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001546 return board_format_string % board.split('-')[0]
1547 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001548
1549
Aviv Keshet74c89a92013-02-04 15:18:30 -08001550 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001551 def has_lightsensor(self):
1552 """Determine the correct board label for this host.
1553
1554 @returns the string 'lightsensor' if this host has a lightsensor or
1555 None if it does not.
1556 """
1557 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001558 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001559 try:
1560 # Run the search cmd following the symlinks. Stderr_tee is set to
1561 # None as there can be a symlink loop, but the command will still
1562 # execute correctly with a few messages printed to stderr.
1563 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1564 return 'lightsensor'
1565 except error.AutoservRunError:
1566 # egrep exited with a return code of 1 meaning none of the possible
1567 # lightsensor files existed.
1568 return None
1569
1570
Aviv Keshet74c89a92013-02-04 15:18:30 -08001571 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001572 def has_bluetooth(self):
1573 """Determine the correct board label for this host.
1574
1575 @returns the string 'bluetooth' if this host has bluetooth or
1576 None if it does not.
1577 """
1578 try:
1579 self.run('test -d /sys/class/bluetooth/hci0')
1580 # test exited with a return code of 0.
1581 return 'bluetooth'
1582 except error.AutoservRunError:
1583 # test exited with a return code 1 meaning the directory did not
1584 # exist.
1585 return None
1586
1587
Ilja Friedel0ce0b602013-08-15 18:45:27 -07001588 @label_decorator('graphics')
1589 def get_graphics(self):
1590 """
1591 Determine the correct board label for this host.
1592
1593 @returns a string representing this host's graphics. For now ARM boards
1594 return graphics:gles while all other boards return graphics:gl. This
1595 may change over time, but for robustness reasons this should avoid
1596 executing code in actual graphics libraries (which may not be ready and
1597 is tested by graphics_GLAPICheck).
1598 """
1599 uname = self.run('uname -a').stdout.lower()
1600 if 'arm' in uname:
1601 return 'graphics:gles'
1602 return 'graphics:gl'
1603
1604
Simran Basic6f1f7a2012-10-16 10:47:46 -07001605 def get_labels(self):
1606 """Return a list of labels for this given host.
1607
1608 This is the main way to retrieve all the automatic labels for a host
1609 as it will run through all the currently implemented label functions.
1610 """
1611 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001612 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001613 label = label_function(self)
1614 if label:
1615 labels.append(label)
1616 return labels