blob: a860fbd7b70b4f4363044c85b768ad223dc5511e [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
J. Richard Barnette1d78b012012-05-15 13:56:30 -07006import logging
Dan Shi0f466e82013-02-22 15:44:58 -08007import os
Simran Basid5e5e272012-09-24 15:23:59 -07008import re
Christopher Wileyd78249a2013-03-01 13:05:31 -08009import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070010import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070011import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013
J. Richard Barnette45e93de2012-04-11 17:24:15 -070014from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080015from autotest_lib.client.common_lib import error
16from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080018from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080019from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080020from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070022from autotest_lib.server import autotest
J. Richard Barnette45e93de2012-04-11 17:24:15 -070023from autotest_lib.server import site_host_attributes
Scott Zawalski89c44dd2013-02-26 09:28:02 -050024from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
25from autotest_lib.server.cros.dynamic_suite import tools
J. Richard Barnette75487572013-03-08 12:47:50 -080026from autotest_lib.server.cros.servo import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070027from autotest_lib.server.hosts import remote
Simran Basidcff4252012-11-20 16:13:20 -080028from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070029
Richard Barnette82c35912012-11-20 10:09:10 -080030# Importing frontend.afe.models requires a full Autotest
31# installation (with the Django modules), not just the source
32# repository. Most developers won't have the full installation, so
33# the imports below will fail for them.
34#
35# The fix is to catch import exceptions, and set `models` to `None`
36# on failure. This has the side effect that
37# SiteHost._get_board_from_afe() will fail: That will manifest as
38# failures during Repair jobs leaving the DUT as "Repair Failed".
39# In practice, you can't test Repair jobs without a full
40# installation, so that kind of failure isn't expected.
41try:
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080042 # pylint: disable=W0611
Richard Barnette82c35912012-11-20 10:09:10 -080043 from autotest_lib.frontend import setup_django_environment
44 from autotest_lib.frontend.afe import models
45except:
46 models = None
47
Simran Basid5e5e272012-09-24 15:23:59 -070048
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080049def _make_servo_hostname(hostname):
50 host_parts = hostname.split('.')
51 host_parts[0] = host_parts[0] + '-servo'
52 return '.'.join(host_parts)
53
54
55def _get_lab_servo(target_hostname):
56 """Instantiate a Servo for |target_hostname| in the lab.
57
58 Assuming that |target_hostname| is a device in the CrOS test
59 lab, create and return a Servo object pointed at the servo
60 attached to that DUT. The servo in the test lab is assumed
61 to already have servod up and running on it.
62
63 @param target_hostname: device whose servo we want to target.
64 @return an appropriately configured Servo instance.
65 """
66 servo_host = _make_servo_hostname(target_hostname)
67 if utils.host_is_in_lab_zone(servo_host):
68 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080069 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080070 except: # pylint: disable=W0702
71 # TODO(jrbarnette): Long-term, if we can't get to
72 # a servo in the lab, we want to fail, so we should
73 # pass any exceptions along. Short-term, we're not
74 # ready to rely on servo, so we ignore failures.
75 pass
76 return None
77
78
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070079def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
80 connect_timeout=None, alive_interval=None):
81 """Override default make_ssh_command to use options tuned for Chrome OS.
82
83 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070084 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
85 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070086
Dale Curtisaa5eedb2011-08-23 16:18:52 -070087 - ServerAliveInterval=180; which causes SSH to ping connection every
88 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
89 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
90 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070091
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070092 - ServerAliveCountMax=3; consistency with remote_access.sh.
93
94 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
95 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070096
97 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
98 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070099
100 - SSH protocol forced to 2; needed for ServerAliveInterval.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800101
102 @param user User name to use for the ssh connection.
103 @param port Port on the target host to use for ssh connection.
104 @param opts Additional options to the ssh command.
105 @param hosts_file Ignored.
106 @param connect_timeout Ignored.
107 @param alive_interval Ignored.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700108 """
109 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
110 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -0700111 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
112 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
113 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700114 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700115
116
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800117
Aviv Keshet74c89a92013-02-04 15:18:30 -0800118def add_label_detector(label_function_list, label_list=None, label=None):
119 """Decorator used to group functions together into the provided list.
120 @param label_function_list: List of label detecting functions to add
121 decorated function to.
122 @param label_list: List of detectable labels to add detectable labels to.
123 (Default: None)
124 @param label: Label string that is detectable by this detection function
125 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800126 """
Simran Basic6f1f7a2012-10-16 10:47:46 -0700127 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -0800128 """
129 @param func: The function to be added as a detector.
130 """
131 label_function_list.append(func)
132 if label and label_list is not None:
133 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700134 return func
135 return add_func
136
137
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700138class SiteHost(remote.RemoteHost):
139 """Chromium OS specific subclass of Host."""
140
141 _parser = autoserv_parser.autoserv_parser
142
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800143 # Time to wait for new kernel to be marked successful after
144 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -0700145 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700146
Richard Barnette03a0c132012-11-05 12:40:35 -0800147 # Timeout values (in seconds) associated with various Chrome OS
148 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700149 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800150 # In general, a good rule of thumb is that the timeout can be up
151 # to twice the typical measured value on the slowest platform.
152 # The times here have not necessarily been empirically tested to
153 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700154 #
155 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800156 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
157 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700158 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800159 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800160 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700161 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800162 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800163 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800164 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700165 # REBOOT_TIMEOUT: Combination of shutdown and reboot times.
Richard Barnette03a0c132012-11-05 12:40:35 -0800166 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700167
168 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800169 RESUME_TIMEOUT = 10
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700170 BOOT_TIMEOUT = 45
171 USB_BOOT_TIMEOUT = 150
172 SHUTDOWN_TIMEOUT = 5
173 REBOOT_TIMEOUT = SHUTDOWN_TIMEOUT + BOOT_TIMEOUT
Richard Barnette03a0c132012-11-05 12:40:35 -0800174 _INSTALL_TIMEOUT = 240
175
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800176 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
177 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
178 _USB_POWER_TIMEOUT = 5
179 _POWER_CYCLE_TIMEOUT = 10
180
Richard Barnette03a0c132012-11-05 12:40:35 -0800181 _DEFAULT_SERVO_URL_FORMAT = ('/static/servo-images/'
182 '%(board)s_test_image.bin')
183
J. Richard Barnettec14897e2013-03-06 15:56:55 -0800184 # TODO(jrbarnette): Servo repair is restricted to specific
185 # boards, because the existing servo client code doesn't account
186 # for board-specific differences in handling for 'cold_reset'.
187 # http://crosbug.com/36973
188 _SERVO_REPAIR_WHITELIST = ('x86-alex', 'lumpy')
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800189
190
Richard Barnette82c35912012-11-20 10:09:10 -0800191 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
192 'rpm_recovery_boards', type=str).split(',')
193
194 _MAX_POWER_CYCLE_ATTEMPTS = 6
195 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
196 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
197 'host[0-9]+')
198 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
199 'in_illuminance0_raw',
200 'illuminance0_input']
201 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
202 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800203 _DETECTABLE_LABELS = []
204 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
205 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700206
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800207 # Constants used in ping_wait_up() and ping_wait_down().
208 #
209 # _PING_WAIT_COUNT is the approximate number of polling
210 # cycles to use when waiting for a host state change.
211 #
212 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
213 # for arguments to the internal _ping_wait_for_status()
214 # method.
215 _PING_WAIT_COUNT = 40
216 _PING_STATUS_DOWN = False
217 _PING_STATUS_UP = True
218
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800219 # Allowed values for the power_method argument.
220
221 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
222 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
223 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
224 POWER_CONTROL_RPM = 'RPM'
225 POWER_CONTROL_SERVO = 'servoj10'
226 POWER_CONTROL_MANUAL = 'manual'
227
228 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
229 POWER_CONTROL_SERVO,
230 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800231
J. Richard Barnette964fba02012-10-24 17:34:29 -0700232 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800233 def get_servo_arguments(args_dict):
234 """Extract servo options from `args_dict` and return the result.
235
236 Take the provided dictionary of argument options and return
237 a subset that represent standard arguments needed to
238 construct a servo object for a host. The intent is to
239 provide standard argument processing from run_remote_tests
240 for tests that require a servo to operate.
241
242 Recommended usage:
243 ~~~~~~~~
244 args_dict = utils.args_to_dict(args)
245 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
246 host = hosts.create_host(machine, servo_args=servo_args)
247 ~~~~~~~~
248
249 @param args_dict Dictionary from which to extract the servo
250 arguments.
251 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700252 servo_args = {}
253 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800254 if arg in args_dict:
255 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700256 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700257
J. Richard Barnette964fba02012-10-24 17:34:29 -0700258
259 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700260 """Initialize superclasses, and |self.servo|.
261
262 For creating the host servo object, there are three
263 possibilities: First, if the host is a lab system known to
264 have a servo board, we connect to that servo unconditionally.
265 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700266 servo features for testing, it will pass settings for
267 `servo_host`, `servo_port`, or both. If neither of these
268 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700269
270 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700271 super(SiteHost, self)._initialize(hostname=hostname,
272 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700273 # self.env is a dictionary of environment variable settings
274 # to be exported for commands run on the host.
275 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
276 # errors that might happen.
277 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700278 self._xmlrpc_proxy_map = {}
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800279 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700280 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700281 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700282
283
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500284 def get_repair_image_name(self):
285 """Generate a image_name from variables in the global config.
286
287 @returns a str of $board-version/$BUILD.
288
289 """
290 stable_version = global_config.global_config.get_config_value(
291 'CROS', 'stable_cros_version')
292 build_pattern = global_config.global_config.get_config_value(
293 'CROS', 'stable_build_pattern')
294 board = self._get_board_from_afe()
295 if board is None:
296 raise error.AutoservError('DUT has no board attribute, '
297 'cannot be repaired.')
298 return build_pattern % (board, stable_version)
299
300
301 def clear_cros_version_labels_and_job_repo_url(self):
302 """Clear cros_version labels and host attribute job_repo_url."""
303 host_model = models.Host.objects.get(hostname=self.hostname)
304 for label in host_model.labels.iterator():
305 if not label.name.startswith(ds_constants.VERSION_PREFIX):
306 continue
Dan Shi0f466e82013-02-22 15:44:58 -0800307
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500308 label.host_set.remove(host_model)
309
310 host_model.set_or_delete_attribute('job_repo_url', None)
311
312
Dan Shi0f466e82013-02-22 15:44:58 -0800313 def _try_stateful_update(self, update_url, force_update, updater):
314 """Try to use stateful update to initialize DUT.
315
316 When DUT is already running the same version that machine_install
317 tries to install, stateful update is a much faster way to clean up
318 the DUT for testing, compared to a full reimage. It is implemeted
319 by calling autoupdater.run_update, but skipping updating root, as
320 updating the kernel is time consuming and not necessary.
321
322 @param update_url: url of the image.
323 @param force_update: Set to True to update the image even if the DUT
324 is running the same version.
325 @param updater: ChromiumOSUpdater instance used to update the DUT.
326 @returns: True if the DUT was updated with stateful update.
327
328 """
Dan Shi7b7379d2013-03-19 16:26:33 -0700329 # Stateful update is disabled until lsb-release has rc build info.
330 logging.info('Stateful update only is disabled.')
331 return False
Dan Shi0f466e82013-02-22 15:44:58 -0800332 if not updater.check_version():
333 return False
334 if not force_update:
335 logging.info('Canceling stateful update because the new and '
336 'old versions are the same.')
337 return False
338 # Following folders should be rebuilt after stateful update.
339 # A test file is used to confirm each folder gets rebuilt after
340 # the stateful update.
341 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
342 test_file = '.test_file_to_be_deleted'
343 for folder in folders_to_check:
344 touch_path = os.path.join(folder, test_file)
345 self.run('touch %s' % touch_path)
346
347 if not updater.run_update(force_update=True, update_root=False):
348 return False
349
350 # Reboot to complete stateful update.
351 self.reboot(timeout=60, wait=True)
352 check_file_cmd = 'test -f %s; echo $?'
353 for folder in folders_to_check:
354 test_file_path = os.path.join(folder, test_file)
355 result = self.run(check_file_cmd % test_file_path,
356 ignore_status=True)
357 if result.exit_status == 1:
358 return False
359 return True
360
361
362 def _post_update_processing(self, updater, inactive_kernel=None):
363 """After the DUT is updated, confirm machine_install succeeded.
364
365 @param updater: ChromiumOSUpdater instance used to update the DUT.
366 @param inactive_kernel: kernel state of inactive kernel before reboot.
367
368 """
369
370 # Touch the lab machine file to leave a marker that distinguishes
371 # this image from other test images.
372 self.run('touch %s' % self._LAB_MACHINE_FILE)
373
374 # Kick off the autoreboot script as the _LAB_MACHINE_FILE was
375 # missing on the first boot.
376 self.run('start autoreboot')
377
378 # Following the reboot, verify the correct version.
379 if not updater.check_version():
380 # Print out crossystem to make it easier to debug the rollback.
381 logging.debug('Dumping partition table.')
382 self.host.run('cgpt show $(rootdev -s -d)')
383 logging.debug('Dumping crossystem for firmware debugging.')
384 self.host.run('crossystem --all')
385 logging.error('Expected Chromium OS version: %s. '
386 'Found Chromium OS %s',
387 self.update_version, updater.get_build_id())
388 raise ChromiumOSError('Updater failed on host %s' %
389 self.host.hostname)
390
391 # Figure out newly active kernel.
392 new_active_kernel, _ = updater.get_kernel_state()
393
394 # Ensure that previously inactive kernel is now the active kernel.
395 if inactive_kernel and new_active_kernel != inactive_kernel:
396 raise autoupdater.ChromiumOSError(
397 'Update failed. New kernel partition is not active after'
398 ' boot.')
399
400 host_attributes = site_host_attributes.HostAttributes(self.hostname)
401 if host_attributes.has_chromeos_firmware:
402 # Wait until tries == 0 and success, or until timeout.
403 utils.poll_for_condition(
404 lambda: (updater.get_kernel_tries(new_active_kernel) == 0
405 and updater.get_kernel_success(new_active_kernel)),
406 exception=autoupdater.ChromiumOSError(
407 'Update failed. Timed out waiting for system to mark'
408 ' new kernel as successful.'),
409 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
410
411
Chris Sosaa3ac2152012-05-23 22:23:13 -0700412 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500413 local_devserver=False, repair=False):
414 """Install the DUT.
415
Dan Shi0f466e82013-02-22 15:44:58 -0800416 Use stateful update if the DUT is already running the same build.
417 Stateful update does not update kernel and tends to run much faster
418 than a full reimage. If the DUT is running a different build, or it
419 failed to do a stateful update, full update, including kernel update,
420 will be applied to the DUT.
421
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500422 @param update_url: The url to use for the update
423 pattern: http://$devserver:###/update/$build
424 If update_url is None and repair is True we will install the
425 stable image listed in global_config under
426 CROS.stable_cros_version.
427 @param force_update: Force an update even if the version installed
428 is the same. Default:False
429 @param local_devserver: Used by run_remote_test to allow people to
430 use their local devserver. Default: False
431 @param repair: Whether or not we are in repair mode. This adds special
432 cases for repairing a machine like starting update_engine.
433 Setting repair to True sets force_update to True as well.
434 default: False
435 @raises autoupdater.ChromiumOSError
436
437 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700438 if not update_url and self._parser.options.image:
439 update_url = self._parser.options.image
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500440 elif not update_url and not repair:
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700441 raise autoupdater.ChromiumOSError(
442 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500443 elif not update_url and repair:
444 image_name = self.get_repair_image_name()
445 devserver = dev_server.ImageServer.resolve(image_name)
446 logging.info('Staging repair build: %s', image_name)
447 devserver.trigger_download(image_name, synchronous=False)
448 self.clear_cros_version_labels_and_job_repo_url()
449 update_url = tools.image_url_pattern() % (devserver.url(),
450 image_name)
451
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500452 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800453 # In case the system is in a bad state, we always reboot the machine
454 # before machine_install.
455 self.reboot(timeout=60, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500456 self.run('stop update-engine; start update-engine')
457 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800458
Chris Sosaa3ac2152012-05-23 22:23:13 -0700459 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Dan Shi0f466e82013-02-22 15:44:58 -0800460 local_devserver=local_devserver)
461 updated = False
462 # If the DUT is already running the same build, try stateful update
463 # first. Stateful update does not update kernel and tends to run much
464 # faster than a full reimage.
465 try:
466 updated = self._try_stateful_update(update_url, force_update,
467 updater)
468 if updated:
469 logging.info('DUT is updated with stateful update.')
470 except Exception as e:
471 logging.exception(e)
472 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700473
Dan Shi0f466e82013-02-22 15:44:58 -0800474 inactive_kernel = None
475 # Do a full update if stateful update is not applicable or failed.
476 if not updated:
477 # In case the system is in a bad state, we always reboot the
478 # machine before machine_install.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700479 self.reboot(timeout=60, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800480 if updater.run_update(force_update):
481 updated = True
482 # Figure out active and inactive kernel.
483 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700484
Dan Shi0f466e82013-02-22 15:44:58 -0800485 # Ensure inactive kernel has higher priority than active.
486 if (updater.get_kernel_priority(inactive_kernel)
487 < updater.get_kernel_priority(active_kernel)):
488 raise autoupdater.ChromiumOSError(
489 'Update failed. The priority of the inactive kernel'
490 ' partition is less than that of the active kernel'
491 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700492
Dan Shi0f466e82013-02-22 15:44:58 -0800493 update_engine_log = '/var/log/update_engine.log'
494 logging.info('Dumping %s', update_engine_log)
495 self.run('cat %s' % update_engine_log)
496 # Updater has returned successfully; reboot the host.
497 self.reboot(timeout=60, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700498
Dan Shi0f466e82013-02-22 15:44:58 -0800499 if updated:
500 self._post_update_processing(updater, inactive_kernel)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800501
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700502 # Clean up any old autotest directories which may be lying around.
503 for path in global_config.global_config.get_config_value(
504 'AUTOSERV', 'client_autodir_paths', type=list):
505 self.run('rm -rf ' + path)
506
507
Simran Basi833814b2013-01-29 13:13:43 -0800508 def _get_label_from_afe(self, label_prefix):
509 """Retrieve a host's specific label from the AFE.
510
511 Looks for a host label that has the form <label_prefix>:<value>
512 and returns the "<value>" part of the label. None is returned
513 if there is not a label matching the pattern
514
515 @returns the label that matches the prefix or 'None'
516 """
517 host_model = models.Host.objects.get(hostname=self.hostname)
518 host_label = host_model.labels.get(name__startswith=label_prefix)
519 if not host_label:
520 return None
521 return host_label.name.split(label_prefix, 1)[1]
522
523
Richard Barnette82c35912012-11-20 10:09:10 -0800524 def _get_board_from_afe(self):
525 """Retrieve this host's board from its labels in the AFE.
526
527 Looks for a host label of the form "board:<board>", and
528 returns the "<board>" part of the label. `None` is returned
529 if there is not a single, unique label matching the pattern.
530
531 @returns board from label, or `None`.
532 """
Simran Basi833814b2013-01-29 13:13:43 -0800533 return self._get_label_from_afe(ds_constants.BOARD_PREFIX)
534
535
536 def get_build(self):
537 """Retrieve the current build for this Host from the AFE.
538
539 Looks through this host's labels in the AFE to determine its build.
540
541 @returns The current build or None if it could not find it or if there
542 were multiple build labels assigned to this host.
543 """
544 return self._get_label_from_afe(ds_constants.VERSION_PREFIX)
Richard Barnette82c35912012-11-20 10:09:10 -0800545
546
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500547 def _install_repair(self):
548 """Attempt to repair this host using upate-engine.
549
550 If the host is up, try installing the DUT with a stable
551 "repair" version of Chrome OS as defined in the global_config
552 under CROS.stable_cros_version.
553
554 @returns True if successful, False if update_engine failed.
555
556 """
557 if not self.is_up():
558 return False
559
560 logging.info('Attempting to reimage machine to repair image.')
561 try:
562 self.machine_install(repair=True)
563 except autoupdater.ChromiumOSError:
564 logging.info('Repair via install failed.')
565 return False
566
567 return True
568
569
Richard Barnette03a0c132012-11-05 12:40:35 -0800570 def _servo_repair(self, board):
571 """Attempt to repair this host using an attached Servo.
572
573 Re-install the OS on the DUT by 1) installing a test image
574 on a USB storage device attached to the Servo board,
575 2) booting that image in recovery mode, and then
576 3) installing the image.
577
578 """
579 server = dev_server.ImageServer.devserver_url_for_servo(board)
580 image = server + (self._DEFAULT_SERVO_URL_FORMAT %
581 { 'board': board })
582 self.servo.install_recovery_image(image)
583 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
584 raise error.AutoservError('DUT failed to boot from USB'
585 ' after %d seconds' %
586 self.USB_BOOT_TIMEOUT)
587 self.run('chromeos-install --yes',
588 timeout=self._INSTALL_TIMEOUT)
589 self.servo.power_long_press()
590 self.servo.set('usb_mux_sel1', 'servo_sees_usbkey')
591 self.servo.power_short_press()
592 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
593 raise error.AutoservError('DUT failed to reboot installed '
594 'test image after %d seconds' %
595 self.BOOT_TIMEOUT)
596
597
Richard Barnette82c35912012-11-20 10:09:10 -0800598 def _powercycle_to_repair(self):
599 """Utilize the RPM Infrastructure to bring the host back up.
600
601 If the host is not up/repaired after the first powercycle we utilize
602 auto fallback to the last good install by powercycling and rebooting the
603 host 6 times.
604 """
605 logging.info('Attempting repair via RPM powercycle.')
606 failed_cycles = 0
607 self.power_cycle()
608 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
609 failed_cycles += 1
610 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
611 raise error.AutoservError('Powercycled host %s %d times; '
612 'device did not come back online.' %
613 (self.hostname, failed_cycles))
614 self.power_cycle()
615 if failed_cycles == 0:
616 logging.info('Powercycling was successful first time.')
617 else:
618 logging.info('Powercycling was successful after %d failures.',
619 failed_cycles)
620
621
622 def repair_full(self):
623 """Repair a host for repair level NO_PROTECTION.
624
625 This overrides the base class function for repair; it does
626 not call back to the parent class, but instead offers a
627 simplified implementation based on the capabilities in the
628 Chrome OS test lab.
629
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700630 If `self.verify()` fails, the following procedures are
631 attempted:
632 1. Try to re-install to a known stable image using
633 auto-update.
634 2. If there's a servo for the DUT, try to re-install via
635 the servo.
636 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800637 by power-cycling.
638
639 As with the parent method, the last operation performed on
640 the DUT must be to call `self.verify()`; if that call fails,
641 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700642
Richard Barnette82c35912012-11-20 10:09:10 -0800643 """
644 try:
645 self.verify()
646 except:
647 host_board = self._get_board_from_afe()
Richard Barnette03a0c132012-11-05 12:40:35 -0800648 if host_board is None:
649 logging.error('host %s has no board; failing repair',
650 self.hostname)
Richard Barnette82c35912012-11-20 10:09:10 -0800651 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500652
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700653 if not self._install_repair():
654 # TODO(scottz): All repair pathways should be
655 # executed until we've exhausted all options. Below
656 # we favor servo over powercycle when we really
657 # should be falling back to power if servo fails.
658 if (self.servo and
659 host_board in self._SERVO_REPAIR_WHITELIST):
660 self._servo_repair(host_board)
661 elif (self.has_power() and
662 host_board in self._RPM_RECOVERY_BOARDS):
663 self._powercycle_to_repair()
664 else:
665 logging.error('host %s has no servo and no RPM control; '
666 'failing repair', self.hostname)
667 raise
Richard Barnette82c35912012-11-20 10:09:10 -0800668 self.verify()
669
670
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700671 def close(self):
672 super(SiteHost, self).close()
673 self.xmlrpc_disconnect_all()
674
675
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700676 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700677 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800678 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500679 try:
680 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
681 '_clear_login_prompt_state')
682 self.run('restart ui')
683 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
684 '_wait_for_login_prompt')
Alex Millerf4517962013-02-25 15:03:02 -0800685 except (error.AutotestRunError, error.AutoservRunError):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500686 logging.warn('Unable to restart ui, rebooting device.')
687 # Since restarting the UI fails fall back to normal Autotest
688 # cleanup routines, i.e. reboot the machine.
689 super(SiteHost, self).cleanup()
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700690
691
Simran Basi154f5582012-10-23 16:27:11 -0700692 # TODO (sbasi) crosbug.com/35656
693 # Renamed the sitehost cleanup method so we don't go down this pathway.
694 # def cleanup(self):
695 def cleanup_poweron(self):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700696 """Special cleanup method to make sure hosts always get power back."""
Chris Sosa9479fcd2012-10-09 13:44:22 -0700697 super(SiteHost, self).cleanup()
Simran Basid5e5e272012-09-24 15:23:59 -0700698 if self.has_power():
Simran Basifd23fb22012-10-22 17:56:22 -0700699 try:
700 self.power_on()
Chris Sosafab08082013-01-04 15:21:20 -0800701 except rpm_client.RemotePowerException:
Simran Basifd23fb22012-10-22 17:56:22 -0700702 # If cleanup has completed but there was an issue with the RPM
703 # Infrastructure, log an error message rather than fail cleanup
704 logging.error('Failed to turn Power On for this host after '
705 'cleanup through the RPM Infrastructure.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700706
707
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700708 def reboot(self, **dargs):
709 """
710 This function reboots the site host. The more generic
711 RemoteHost.reboot() performs sync and sleeps for 5
712 seconds. This is not necessary for Chrome OS devices as the
713 sync should be finished in a short time during the reboot
714 command.
715 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800716 if 'reboot_cmd' not in dargs:
717 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
718 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700719 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800720 if 'fastsync' not in dargs:
721 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700722 super(SiteHost, self).reboot(**dargs)
723
724
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700725 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800726 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700727
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800728 Tests for the following conditions:
729 1. All conditions tested by the parent version of this
730 function.
731 2. Sufficient space in /mnt/stateful_partition.
732 3. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700733
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700734 """
735 super(SiteHost, self).verify_software()
736 self.check_diskspace(
737 '/mnt/stateful_partition',
738 global_config.global_config.get_config_value(
739 'SERVER', 'gb_diskspace_required', type=int,
740 default=20))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800741 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -0500742 # Makes sure python is present, loads and can use built in functions.
743 # We have seen cases where importing cPickle fails with undefined
744 # symbols in cPickle.so.
745 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700746
747
Christopher Wileyd78249a2013-03-01 13:05:31 -0800748 def xmlrpc_connect(self, command, port, command_name=None,
749 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700750 """Connect to an XMLRPC server on the host.
751
752 The `command` argument should be a simple shell command that
753 starts an XMLRPC server on the given `port`. The command
754 must not daemonize, and must terminate cleanly on SIGTERM.
755 The command is started in the background on the host, and a
756 local XMLRPC client for the server is created and returned
757 to the caller.
758
759 Note that the process of creating an XMLRPC client makes no
760 attempt to connect to the remote server; the caller is
761 responsible for determining whether the server is running
762 correctly, and is ready to serve requests.
763
Christopher Wileyd78249a2013-03-01 13:05:31 -0800764 Optionally, the caller can pass ready_test_name, a string
765 containing the name of a method to call on the proxy. This
766 method should take no parameters and return successfully only
767 when the server is ready to process client requests. When
768 ready_test_name is set, xmlrpc_connect will block until the
769 proxy is ready, and throw a TestError if the server isn't
770 ready by timeout_seconds.
771
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700772 @param command Shell command to start the server.
773 @param port Port number on which the server is expected to
774 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800775 @param command_name String to use as input to `pkill` to
776 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -0800777 @param ready_test_name String containing the name of a
778 method defined on the XMLRPC server.
779 @param timeout_seconds Number of seconds to wait
780 for the server to become 'ready.' Will throw a
781 TestFail error if server is not ready in time.
782
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700783 """
784 self.xmlrpc_disconnect(port)
785
786 # Chrome OS on the target closes down most external ports
787 # for security. We could open the port, but doing that
788 # would conflict with security tests that check that only
789 # expected ports are open. So, to get to the port on the
790 # target we use an ssh tunnel.
791 local_port = utils.get_unused_port()
792 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
793 ssh_cmd = make_ssh_command(opts=tunnel_options)
794 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
795 logging.debug('Full tunnel command: %s', tunnel_cmd)
796 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
797 logging.debug('Started XMLRPC tunnel, local = %d'
798 ' remote = %d, pid = %d',
799 local_port, port, tunnel_proc.pid)
800
801 # Start the server on the host. Redirection in the command
802 # below is necessary, because 'ssh' won't terminate until
803 # background child processes close stdin, stdout, and
804 # stderr.
805 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
806 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
807 logging.debug('Started XMLRPC server on host %s, pid = %s',
808 self.hostname, remote_pid)
809
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800810 self._xmlrpc_proxy_map[port] = (command_name, tunnel_proc)
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700811 rpc_url = 'http://localhost:%d' % local_port
Christopher Wileyd78249a2013-03-01 13:05:31 -0800812 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
813 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -0800814 # retry.retry logs each attempt; calculate delay_sec to
815 # keep log spam to a dull roar.
Christopher Wileyd78249a2013-03-01 13:05:31 -0800816 @retry.retry((socket.error, xmlrpclib.ProtocolError),
817 timeout_min=timeout_seconds/60.0,
J. Richard Barnette13eb7c02013-03-07 12:06:29 -0800818 delay_sec=min(max(timeout_seconds/20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -0800819 def ready_test():
820 """ Call proxy.ready_test_name(). """
821 getattr(proxy, ready_test_name)()
822 successful = False
823 try:
824 logging.info('Waiting %d seconds for XMLRPC server '
825 'to start.', timeout_seconds)
826 ready_test()
827 successful = True
828 except retry.TimeoutException:
829 raise error.TestError('Unable to start XMLRPC server after '
830 '%d seconds.' % timeout_seconds)
831 finally:
832 if not successful:
833 logging.error('Failed to start XMLRPC server.')
834 self.xmlrpc_disconnect(port)
835 logging.info('XMLRPC server started successfully.')
836 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700837
838 def xmlrpc_disconnect(self, port):
839 """Disconnect from an XMLRPC server on the host.
840
841 Terminates the remote XMLRPC server previously started for
842 the given `port`. Also closes the local ssh tunnel created
843 for the connection to the host. This function does not
844 directly alter the state of a previously returned XMLRPC
845 client object; however disconnection will cause all
846 subsequent calls to methods on the object to fail.
847
848 This function does nothing if requested to disconnect a port
849 that was not previously connected via `self.xmlrpc_connect()`
850
851 @param port Port number passed to a previous call to
852 `xmlrpc_connect()`
853 """
854 if port not in self._xmlrpc_proxy_map:
855 return
856 entry = self._xmlrpc_proxy_map[port]
857 remote_name = entry[0]
858 tunnel_proc = entry[1]
859 if remote_name:
860 # We use 'pkill' to find our target process rather than
861 # a PID, because the host may have rebooted since
862 # connecting, and we don't want to kill an innocent
863 # process with the same PID.
864 #
865 # 'pkill' helpfully exits with status 1 if no target
866 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -0700867 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700868 # status.
869 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
870
871 if tunnel_proc.poll() is None:
872 tunnel_proc.terminate()
873 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
874 else:
875 logging.debug('Tunnel pid %d terminated early, status %d',
876 tunnel_proc.pid, tunnel_proc.returncode)
877 del self._xmlrpc_proxy_map[port]
878
879
880 def xmlrpc_disconnect_all(self):
881 """Disconnect all known XMLRPC proxy ports."""
882 for port in self._xmlrpc_proxy_map.keys():
883 self.xmlrpc_disconnect(port)
884
885
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800886 def _ping_check_status(self, status):
887 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700888
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800889 @param status Check the ping status against this value.
890 @return True iff `status` and the result of ping are the same
891 (i.e. both True or both False).
892
893 """
894 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
895 return not (status ^ (ping_val == 0))
896
897 def _ping_wait_for_status(self, status, timeout):
898 """Wait for the host to have a given status (UP or DOWN).
899
900 Status is checked by polling. Polling will not last longer
901 than the number of seconds in `timeout`. The polling
902 interval will be long enough that only approximately
903 _PING_WAIT_COUNT polling cycles will be executed, subject
904 to a maximum interval of about one minute.
905
906 @param status Waiting will stop immediately if `ping` of the
907 host returns this status.
908 @param timeout Poll for at most this many seconds.
909 @return True iff the host status from `ping` matched the
910 requested status at the time of return.
911
912 """
913 # _ping_check_status() takes about 1 second, hence the
914 # "- 1" in the formula below.
915 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
916 end_time = time.time() + timeout
917 while time.time() <= end_time:
918 if self._ping_check_status(status):
919 return True
920 if poll_interval > 0:
921 time.sleep(poll_interval)
922
923 # The last thing we did was sleep(poll_interval), so it may
924 # have been too long since the last `ping`. Check one more
925 # time, just to be sure.
926 return self._ping_check_status(status)
927
928 def ping_wait_up(self, timeout):
929 """Wait for the host to respond to `ping`.
930
931 N.B. This method is not a reliable substitute for
932 `wait_up()`, because a host that responds to ping will not
933 necessarily respond to ssh. This method should only be used
934 if the target DUT can be considered functional even if it
935 can't be reached via ssh.
936
937 @param timeout Minimum time to allow before declaring the
938 host to be non-responsive.
939 @return True iff the host answered to ping before the timeout.
940
941 """
942 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700943
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800944 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700945 """Wait until the host no longer responds to `ping`.
946
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800947 This function can be used as a slightly faster version of
948 `wait_down()`, by avoiding potentially long ssh timeouts.
949
950 @param timeout Minimum time to allow for the host to become
951 non-responsive.
952 @return True iff the host quit answering ping before the
953 timeout.
954
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700955 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800956 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700957
958 def test_wait_for_sleep(self):
959 """Wait for the client to enter low-power sleep mode.
960
961 The test for "is asleep" can't distinguish a system that is
962 powered off; to confirm that the unit was asleep, it is
963 necessary to force resume, and then call
964 `test_wait_for_resume()`.
965
966 This function is expected to be called from a test as part
967 of a sequence like the following:
968
969 ~~~~~~~~
970 boot_id = host.get_boot_id()
971 # trigger sleep on the host
972 host.test_wait_for_sleep()
973 # trigger resume on the host
974 host.test_wait_for_resume(boot_id)
975 ~~~~~~~~
976
977 @exception TestFail The host did not go to sleep within
978 the allowed time.
979 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800980 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700981 raise error.TestFail(
982 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700983 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700984
985
986 def test_wait_for_resume(self, old_boot_id):
987 """Wait for the client to resume from low-power sleep mode.
988
989 The `old_boot_id` parameter should be the value from
990 `get_boot_id()` obtained prior to entering sleep mode. A
991 `TestFail` exception is raised if the boot id changes.
992
993 See @ref test_wait_for_sleep for more on this function's
994 usage.
995
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800996 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700997 target host went to sleep.
998
999 @exception TestFail The host did not respond within the
1000 allowed time.
1001 @exception TestFail The host responded, but the boot id test
1002 indicated a reboot rather than a sleep
1003 cycle.
1004 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001005 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001006 raise error.TestFail(
1007 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001008 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001009 else:
1010 new_boot_id = self.get_boot_id()
1011 if new_boot_id != old_boot_id:
1012 raise error.TestFail(
1013 'client rebooted, but sleep was expected'
1014 ' (old boot %s, new boot %s)'
1015 % (old_boot_id, new_boot_id))
1016
1017
1018 def test_wait_for_shutdown(self):
1019 """Wait for the client to shut down.
1020
1021 The test for "has shut down" can't distinguish a system that
1022 is merely asleep; to confirm that the unit was down, it is
1023 necessary to force boot, and then call test_wait_for_boot().
1024
1025 This function is expected to be called from a test as part
1026 of a sequence like the following:
1027
1028 ~~~~~~~~
1029 boot_id = host.get_boot_id()
1030 # trigger shutdown on the host
1031 host.test_wait_for_shutdown()
1032 # trigger boot on the host
1033 host.test_wait_for_boot(boot_id)
1034 ~~~~~~~~
1035
1036 @exception TestFail The host did not shut down within the
1037 allowed time.
1038 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001039 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001040 raise error.TestFail(
1041 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001042 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001043
1044
1045 def test_wait_for_boot(self, old_boot_id=None):
1046 """Wait for the client to boot from cold power.
1047
1048 The `old_boot_id` parameter should be the value from
1049 `get_boot_id()` obtained prior to shutting down. A
1050 `TestFail` exception is raised if the boot id does not
1051 change. The boot id test is omitted if `old_boot_id` is not
1052 specified.
1053
1054 See @ref test_wait_for_shutdown for more on this function's
1055 usage.
1056
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001057 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001058 shut down.
1059
1060 @exception TestFail The host did not respond within the
1061 allowed time.
1062 @exception TestFail The host responded, but the boot id test
1063 indicated that there was no reboot.
1064 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001065 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001066 raise error.TestFail(
1067 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001068 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001069 elif old_boot_id:
1070 if self.get_boot_id() == old_boot_id:
1071 raise error.TestFail(
1072 'client is back up, but did not reboot'
1073 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001074
1075
1076 @staticmethod
1077 def check_for_rpm_support(hostname):
1078 """For a given hostname, return whether or not it is powered by an RPM.
1079
1080 @return None if this host does not follows the defined naming format
1081 for RPM powered DUT's in the lab. If it does follow the format,
1082 it returns a regular expression MatchObject instead.
1083 """
Richard Barnette82c35912012-11-20 10:09:10 -08001084 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001085
1086
1087 def has_power(self):
1088 """For this host, return whether or not it is powered by an RPM.
1089
1090 @return True if this host is in the CROS lab and follows the defined
1091 naming format.
1092 """
1093 return SiteHost.check_for_rpm_support(self.hostname)
1094
1095
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001096 def _set_power(self, state, power_method):
1097 """Sets the power to the host via RPM, Servo or manual.
1098
1099 @param state Specifies which power state to set to DUT
1100 @param power_method Specifies which method of power control to
1101 use. By default "RPM" will be used. Valid values
1102 are the strings "RPM", "manual", "servoj10".
1103
1104 """
1105 ACCEPTABLE_STATES = ['ON', 'OFF']
1106
1107 if state.upper() not in ACCEPTABLE_STATES:
1108 raise error.TestError('State must be one of: %s.'
1109 % (ACCEPTABLE_STATES,))
1110
1111 if power_method == self.POWER_CONTROL_SERVO:
1112 logging.info('Setting servo port J10 to %s', state)
1113 self.servo.set('prtctl3_pwren', state.lower())
1114 time.sleep(self._USB_POWER_TIMEOUT)
1115 elif power_method == self.POWER_CONTROL_MANUAL:
1116 logging.info('You have %d seconds to set the AC power to %s.',
1117 self._POWER_CYCLE_TIMEOUT, state)
1118 time.sleep(self._POWER_CYCLE_TIMEOUT)
1119 else:
1120 if not self.has_power():
1121 raise error.TestFail('DUT does not have RPM connected.')
1122 rpm_client.set_power(self.hostname, state.upper())
Simran Basid5e5e272012-09-24 15:23:59 -07001123
1124
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001125 def power_off(self, power_method=POWER_CONTROL_RPM):
1126 """Turn off power to this host via RPM, Servo or manual.
1127
1128 @param power_method Specifies which method of power control to
1129 use. By default "RPM" will be used. Valid values
1130 are the strings "RPM", "manual", "servoj10".
1131
1132 """
1133 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001134
1135
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001136 def power_on(self, power_method=POWER_CONTROL_RPM):
1137 """Turn on power to this host via RPM, Servo or manual.
1138
1139 @param power_method Specifies which method of power control to
1140 use. By default "RPM" will be used. Valid values
1141 are the strings "RPM", "manual", "servoj10".
1142
1143 """
1144 self._set_power('ON', power_method)
1145
1146
1147 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1148 """Cycle power to this host by turning it OFF, then ON.
1149
1150 @param power_method Specifies which method of power control to
1151 use. By default "RPM" will be used. Valid values
1152 are the strings "RPM", "manual", "servoj10".
1153
1154 """
1155 if power_method in (self.POWER_CONTROL_SERVO,
1156 self.POWER_CONTROL_MANUAL):
1157 self.power_off(power_method=power_method)
1158 time.sleep(self._POWER_CYCLE_TIMEOUT)
1159 self.power_on(power_method=power_method)
1160 else:
1161 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001162
1163
1164 def get_platform(self):
1165 """Determine the correct platform label for this host.
1166
1167 @returns a string representing this host's platform.
1168 """
1169 crossystem = utils.Crossystem(self)
1170 crossystem.init()
1171 # Extract fwid value and use the leading part as the platform id.
1172 # fwid generally follow the format of {platform}.{firmware version}
1173 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1174 platform = crossystem.fwid().split('.')[0].lower()
1175 # Newer platforms start with 'Google_' while the older ones do not.
1176 return platform.replace('google_', '')
1177
1178
Aviv Keshet74c89a92013-02-04 15:18:30 -08001179 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001180 def get_board(self):
1181 """Determine the correct board label for this host.
1182
1183 @returns a string representing this host's board.
1184 """
1185 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1186 run_method=self.run)
1187 board = release_info['CHROMEOS_RELEASE_BOARD']
1188 # Devices in the lab generally have the correct board name but our own
1189 # development devices have {board_name}-signed-{key_type}. The board
1190 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001191 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001192 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001193 return board_format_string % board.split('-')[0]
1194 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001195
1196
Aviv Keshet74c89a92013-02-04 15:18:30 -08001197 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001198 def has_lightsensor(self):
1199 """Determine the correct board label for this host.
1200
1201 @returns the string 'lightsensor' if this host has a lightsensor or
1202 None if it does not.
1203 """
1204 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001205 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001206 try:
1207 # Run the search cmd following the symlinks. Stderr_tee is set to
1208 # None as there can be a symlink loop, but the command will still
1209 # execute correctly with a few messages printed to stderr.
1210 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1211 return 'lightsensor'
1212 except error.AutoservRunError:
1213 # egrep exited with a return code of 1 meaning none of the possible
1214 # lightsensor files existed.
1215 return None
1216
1217
Aviv Keshet74c89a92013-02-04 15:18:30 -08001218 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001219 def has_bluetooth(self):
1220 """Determine the correct board label for this host.
1221
1222 @returns the string 'bluetooth' if this host has bluetooth or
1223 None if it does not.
1224 """
1225 try:
1226 self.run('test -d /sys/class/bluetooth/hci0')
1227 # test exited with a return code of 0.
1228 return 'bluetooth'
1229 except error.AutoservRunError:
1230 # test exited with a return code 1 meaning the directory did not
1231 # exist.
1232 return None
1233
1234
1235 def get_labels(self):
1236 """Return a list of labels for this given host.
1237
1238 This is the main way to retrieve all the automatic labels for a host
1239 as it will run through all the currently implemented label functions.
1240 """
1241 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001242 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001243 label = label_function(self)
1244 if label:
1245 labels.append(label)
1246 return labels