blob: 758e5c8d3f05be51e3d07f7f68c043001449cf59 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
J. Richard Barnette1d78b012012-05-15 13:56:30 -07006import logging
Dan Shi0f466e82013-02-22 15:44:58 -08007import os
Simran Basid5e5e272012-09-24 15:23:59 -07008import re
Christopher Wileyd78249a2013-03-01 13:05:31 -08009import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070010import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070011import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013
J. Richard Barnette45e93de2012-04-11 17:24:15 -070014from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080015from autotest_lib.client.common_lib import error
16from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080018from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080019from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080020from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070022from autotest_lib.server import autotest
J. Richard Barnette45e93de2012-04-11 17:24:15 -070023from autotest_lib.server import site_host_attributes
Scott Zawalski89c44dd2013-02-26 09:28:02 -050024from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070025from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
J. Richard Barnette75487572013-03-08 12:47:50 -080026from autotest_lib.server.cros.servo import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070027from autotest_lib.server.hosts import remote
Simran Basidcff4252012-11-20 16:13:20 -080028from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070029
Richard Barnette82c35912012-11-20 10:09:10 -080030# Importing frontend.afe.models requires a full Autotest
31# installation (with the Django modules), not just the source
32# repository. Most developers won't have the full installation, so
33# the imports below will fail for them.
34#
35# The fix is to catch import exceptions, and set `models` to `None`
36# on failure. This has the side effect that
37# SiteHost._get_board_from_afe() will fail: That will manifest as
38# failures during Repair jobs leaving the DUT as "Repair Failed".
39# In practice, you can't test Repair jobs without a full
40# installation, so that kind of failure isn't expected.
41try:
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080042 # pylint: disable=W0611
Richard Barnette82c35912012-11-20 10:09:10 -080043 from autotest_lib.frontend import setup_django_environment
44 from autotest_lib.frontend.afe import models
45except:
46 models = None
47
Simran Basid5e5e272012-09-24 15:23:59 -070048
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080049def _make_servo_hostname(hostname):
50 host_parts = hostname.split('.')
51 host_parts[0] = host_parts[0] + '-servo'
52 return '.'.join(host_parts)
53
54
55def _get_lab_servo(target_hostname):
56 """Instantiate a Servo for |target_hostname| in the lab.
57
58 Assuming that |target_hostname| is a device in the CrOS test
59 lab, create and return a Servo object pointed at the servo
60 attached to that DUT. The servo in the test lab is assumed
61 to already have servod up and running on it.
62
63 @param target_hostname: device whose servo we want to target.
64 @return an appropriately configured Servo instance.
65 """
66 servo_host = _make_servo_hostname(target_hostname)
67 if utils.host_is_in_lab_zone(servo_host):
68 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080069 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080070 except: # pylint: disable=W0702
71 # TODO(jrbarnette): Long-term, if we can't get to
72 # a servo in the lab, we want to fail, so we should
73 # pass any exceptions along. Short-term, we're not
74 # ready to rely on servo, so we ignore failures.
75 pass
76 return None
77
78
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070079def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
80 connect_timeout=None, alive_interval=None):
81 """Override default make_ssh_command to use options tuned for Chrome OS.
82
83 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070084 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
85 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070086
Dale Curtisaa5eedb2011-08-23 16:18:52 -070087 - ServerAliveInterval=180; which causes SSH to ping connection every
88 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
89 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
90 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070091
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070092 - ServerAliveCountMax=3; consistency with remote_access.sh.
93
94 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
95 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070096
97 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
98 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070099
100 - SSH protocol forced to 2; needed for ServerAliveInterval.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800101
102 @param user User name to use for the ssh connection.
103 @param port Port on the target host to use for ssh connection.
104 @param opts Additional options to the ssh command.
105 @param hosts_file Ignored.
106 @param connect_timeout Ignored.
107 @param alive_interval Ignored.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700108 """
109 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
110 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -0700111 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
112 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
113 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700114 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700115
116
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800117
Aviv Keshet74c89a92013-02-04 15:18:30 -0800118def add_label_detector(label_function_list, label_list=None, label=None):
119 """Decorator used to group functions together into the provided list.
120 @param label_function_list: List of label detecting functions to add
121 decorated function to.
122 @param label_list: List of detectable labels to add detectable labels to.
123 (Default: None)
124 @param label: Label string that is detectable by this detection function
125 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800126 """
Simran Basic6f1f7a2012-10-16 10:47:46 -0700127 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -0800128 """
129 @param func: The function to be added as a detector.
130 """
131 label_function_list.append(func)
132 if label and label_list is not None:
133 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700134 return func
135 return add_func
136
137
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700138class SiteHost(remote.RemoteHost):
139 """Chromium OS specific subclass of Host."""
140
141 _parser = autoserv_parser.autoserv_parser
142
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800143 # Time to wait for new kernel to be marked successful after
144 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -0700145 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700146
Richard Barnette03a0c132012-11-05 12:40:35 -0800147 # Timeout values (in seconds) associated with various Chrome OS
148 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700149 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800150 # In general, a good rule of thumb is that the timeout can be up
151 # to twice the typical measured value on the slowest platform.
152 # The times here have not necessarily been empirically tested to
153 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700154 #
155 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800156 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
157 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700158 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800159 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800160 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700161 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800162 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800163 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800164 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700165 # REBOOT_TIMEOUT: Combination of shutdown and reboot times.
Richard Barnette03a0c132012-11-05 12:40:35 -0800166 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700167
168 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800169 RESUME_TIMEOUT = 10
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700170 BOOT_TIMEOUT = 45
171 USB_BOOT_TIMEOUT = 150
172 SHUTDOWN_TIMEOUT = 5
173 REBOOT_TIMEOUT = SHUTDOWN_TIMEOUT + BOOT_TIMEOUT
Richard Barnette03a0c132012-11-05 12:40:35 -0800174 _INSTALL_TIMEOUT = 240
175
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800176 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
177 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
178 _USB_POWER_TIMEOUT = 5
179 _POWER_CYCLE_TIMEOUT = 10
180
Richard Barnette03a0c132012-11-05 12:40:35 -0800181 _DEFAULT_SERVO_URL_FORMAT = ('/static/servo-images/'
182 '%(board)s_test_image.bin')
183
J. Richard Barnettec14897e2013-03-06 15:56:55 -0800184 # TODO(jrbarnette): Servo repair is restricted to specific
185 # boards, because the existing servo client code doesn't account
186 # for board-specific differences in handling for 'cold_reset'.
187 # http://crosbug.com/36973
188 _SERVO_REPAIR_WHITELIST = ('x86-alex', 'lumpy')
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800189
190
Richard Barnette82c35912012-11-20 10:09:10 -0800191 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
192 'rpm_recovery_boards', type=str).split(',')
193
194 _MAX_POWER_CYCLE_ATTEMPTS = 6
195 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
196 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
197 'host[0-9]+')
198 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
199 'in_illuminance0_raw',
200 'illuminance0_input']
201 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
202 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800203 _DETECTABLE_LABELS = []
204 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
205 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700206
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800207 # Constants used in ping_wait_up() and ping_wait_down().
208 #
209 # _PING_WAIT_COUNT is the approximate number of polling
210 # cycles to use when waiting for a host state change.
211 #
212 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
213 # for arguments to the internal _ping_wait_for_status()
214 # method.
215 _PING_WAIT_COUNT = 40
216 _PING_STATUS_DOWN = False
217 _PING_STATUS_UP = True
218
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800219 # Allowed values for the power_method argument.
220
221 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
222 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
223 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
224 POWER_CONTROL_RPM = 'RPM'
225 POWER_CONTROL_SERVO = 'servoj10'
226 POWER_CONTROL_MANUAL = 'manual'
227
228 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
229 POWER_CONTROL_SERVO,
230 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800231
Simran Basi5e6339a2013-03-21 11:34:32 -0700232 _RPM_OUTLET_CHANGED = 'outlet_changed'
233
J. Richard Barnette964fba02012-10-24 17:34:29 -0700234 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800235 def get_servo_arguments(args_dict):
236 """Extract servo options from `args_dict` and return the result.
237
238 Take the provided dictionary of argument options and return
239 a subset that represent standard arguments needed to
240 construct a servo object for a host. The intent is to
241 provide standard argument processing from run_remote_tests
242 for tests that require a servo to operate.
243
244 Recommended usage:
245 ~~~~~~~~
246 args_dict = utils.args_to_dict(args)
247 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
248 host = hosts.create_host(machine, servo_args=servo_args)
249 ~~~~~~~~
250
251 @param args_dict Dictionary from which to extract the servo
252 arguments.
253 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700254 servo_args = {}
255 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800256 if arg in args_dict:
257 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700258 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700259
J. Richard Barnette964fba02012-10-24 17:34:29 -0700260
261 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700262 """Initialize superclasses, and |self.servo|.
263
264 For creating the host servo object, there are three
265 possibilities: First, if the host is a lab system known to
266 have a servo board, we connect to that servo unconditionally.
267 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700268 servo features for testing, it will pass settings for
269 `servo_host`, `servo_port`, or both. If neither of these
270 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700271
272 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700273 super(SiteHost, self)._initialize(hostname=hostname,
274 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700275 # self.env is a dictionary of environment variable settings
276 # to be exported for commands run on the host.
277 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
278 # errors that might happen.
279 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700280 self._xmlrpc_proxy_map = {}
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800281 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700282 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700283 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700284
285
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500286 def get_repair_image_name(self):
287 """Generate a image_name from variables in the global config.
288
289 @returns a str of $board-version/$BUILD.
290
291 """
292 stable_version = global_config.global_config.get_config_value(
293 'CROS', 'stable_cros_version')
294 build_pattern = global_config.global_config.get_config_value(
295 'CROS', 'stable_build_pattern')
296 board = self._get_board_from_afe()
297 if board is None:
298 raise error.AutoservError('DUT has no board attribute, '
299 'cannot be repaired.')
300 return build_pattern % (board, stable_version)
301
302
303 def clear_cros_version_labels_and_job_repo_url(self):
304 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalskieadbf702013-03-14 09:23:06 -0400305 try:
306 host_model = models.Host.objects.get(hostname=self.hostname)
307 except models.Host.DoesNotExist:
308 return
309
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500310 for label in host_model.labels.iterator():
311 if not label.name.startswith(ds_constants.VERSION_PREFIX):
312 continue
Dan Shi0f466e82013-02-22 15:44:58 -0800313
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500314 label.host_set.remove(host_model)
315
316 host_model.set_or_delete_attribute('job_repo_url', None)
317
318
Scott Zawalskieadbf702013-03-14 09:23:06 -0400319 def add_cros_version_labels_and_job_repo_url(self, image_name):
320 """Add cros_version labels and host attribute job_repo_url.
321
322 @param image_name: The name of the image e.g.
323 lumpy-release/R27-3837.0.0
324 """
325 try:
326 host_model = models.Host.objects.get(hostname=self.hostname)
327 except models.Host.DoesNotExist:
328 return
329 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
330 devserver_url = dev_server.ImageServer.resolve(image_name).url()
331 try:
332 label_model = models.Label.objects.get(name=cros_label)
333 except models.Label.DoesNotExist:
334 label_model = models.Label.objects.create(name=cros_label)
335 host_model.labels.add(label_model)
336 repo_url = tools.get_package_url(devserver_url, image_name)
337 host_model.set_or_delete_attribute('job_repo_url', repo_url)
338
339
Dan Shi0f466e82013-02-22 15:44:58 -0800340 def _try_stateful_update(self, update_url, force_update, updater):
341 """Try to use stateful update to initialize DUT.
342
343 When DUT is already running the same version that machine_install
344 tries to install, stateful update is a much faster way to clean up
345 the DUT for testing, compared to a full reimage. It is implemeted
346 by calling autoupdater.run_update, but skipping updating root, as
347 updating the kernel is time consuming and not necessary.
348
349 @param update_url: url of the image.
350 @param force_update: Set to True to update the image even if the DUT
351 is running the same version.
352 @param updater: ChromiumOSUpdater instance used to update the DUT.
353 @returns: True if the DUT was updated with stateful update.
354
355 """
Dan Shi7b7379d2013-03-19 16:26:33 -0700356 # Stateful update is disabled until lsb-release has rc build info.
357 logging.info('Stateful update only is disabled.')
358 return False
Dan Shi0f466e82013-02-22 15:44:58 -0800359 if not updater.check_version():
360 return False
361 if not force_update:
362 logging.info('Canceling stateful update because the new and '
363 'old versions are the same.')
364 return False
365 # Following folders should be rebuilt after stateful update.
366 # A test file is used to confirm each folder gets rebuilt after
367 # the stateful update.
368 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
369 test_file = '.test_file_to_be_deleted'
370 for folder in folders_to_check:
371 touch_path = os.path.join(folder, test_file)
372 self.run('touch %s' % touch_path)
373
374 if not updater.run_update(force_update=True, update_root=False):
375 return False
376
377 # Reboot to complete stateful update.
378 self.reboot(timeout=60, wait=True)
379 check_file_cmd = 'test -f %s; echo $?'
380 for folder in folders_to_check:
381 test_file_path = os.path.join(folder, test_file)
382 result = self.run(check_file_cmd % test_file_path,
383 ignore_status=True)
384 if result.exit_status == 1:
385 return False
386 return True
387
388
389 def _post_update_processing(self, updater, inactive_kernel=None):
390 """After the DUT is updated, confirm machine_install succeeded.
391
392 @param updater: ChromiumOSUpdater instance used to update the DUT.
393 @param inactive_kernel: kernel state of inactive kernel before reboot.
394
395 """
396
397 # Touch the lab machine file to leave a marker that distinguishes
398 # this image from other test images.
399 self.run('touch %s' % self._LAB_MACHINE_FILE)
400
401 # Kick off the autoreboot script as the _LAB_MACHINE_FILE was
402 # missing on the first boot.
403 self.run('start autoreboot')
404
405 # Following the reboot, verify the correct version.
406 if not updater.check_version():
407 # Print out crossystem to make it easier to debug the rollback.
408 logging.debug('Dumping partition table.')
Dan Shi346725f2013-03-20 15:22:38 -0700409 self.run('cgpt show $(rootdev -s -d)')
Dan Shi0f466e82013-02-22 15:44:58 -0800410 logging.debug('Dumping crossystem for firmware debugging.')
Dan Shi346725f2013-03-20 15:22:38 -0700411 self.run('crossystem --all')
Dan Shi0f466e82013-02-22 15:44:58 -0800412 logging.error('Expected Chromium OS version: %s. '
413 'Found Chromium OS %s',
Dan Shi346725f2013-03-20 15:22:38 -0700414 updater.update_version, updater.get_build_id())
415 raise autoupdater.ChromiumOSError('Updater failed on host %s' %
416 self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800417
418 # Figure out newly active kernel.
419 new_active_kernel, _ = updater.get_kernel_state()
420
421 # Ensure that previously inactive kernel is now the active kernel.
422 if inactive_kernel and new_active_kernel != inactive_kernel:
423 raise autoupdater.ChromiumOSError(
424 'Update failed. New kernel partition is not active after'
425 ' boot.')
426
Scott Zawalskieadbf702013-03-14 09:23:06 -0400427 try:
428 host_attributes = site_host_attributes.HostAttributes(self.hostname)
429 except models.Host.DoesNotExist:
430 host_attributes = None
431 if host_attributes and host_attributes.has_chromeos_firmware:
Dan Shi0f466e82013-02-22 15:44:58 -0800432 # Wait until tries == 0 and success, or until timeout.
433 utils.poll_for_condition(
434 lambda: (updater.get_kernel_tries(new_active_kernel) == 0
435 and updater.get_kernel_success(new_active_kernel)),
436 exception=autoupdater.ChromiumOSError(
437 'Update failed. Timed out waiting for system to mark'
438 ' new kernel as successful.'),
439 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
440
441
Scott Zawalskieadbf702013-03-14 09:23:06 -0400442 def _stage_build_and_return_update_url(self, image_name):
443 """Stage a build on a devserver and return the update_url.
444
445 @param image_name: a name like lumpy-release/R27-3837.0.0
446 @returns an update URL like:
447 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
448 """
449 logging.info('Staging requested build: %s', image_name)
450 devserver = dev_server.ImageServer.resolve(image_name)
451 devserver.trigger_download(image_name, synchronous=False)
452 return tools.image_url_pattern() % (devserver.url(), image_name)
453
454
Chris Sosaa3ac2152012-05-23 22:23:13 -0700455 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500456 local_devserver=False, repair=False):
457 """Install the DUT.
458
Dan Shi0f466e82013-02-22 15:44:58 -0800459 Use stateful update if the DUT is already running the same build.
460 Stateful update does not update kernel and tends to run much faster
461 than a full reimage. If the DUT is running a different build, or it
462 failed to do a stateful update, full update, including kernel update,
463 will be applied to the DUT.
464
Scott Zawalskieadbf702013-03-14 09:23:06 -0400465 Once a host enters machine_install its cros_version label will be
466 removed as well as its host attribute job_repo_url (used for
467 package install).
468
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500469 @param update_url: The url to use for the update
470 pattern: http://$devserver:###/update/$build
471 If update_url is None and repair is True we will install the
472 stable image listed in global_config under
473 CROS.stable_cros_version.
474 @param force_update: Force an update even if the version installed
475 is the same. Default:False
476 @param local_devserver: Used by run_remote_test to allow people to
477 use their local devserver. Default: False
478 @param repair: Whether or not we are in repair mode. This adds special
479 cases for repairing a machine like starting update_engine.
480 Setting repair to True sets force_update to True as well.
481 default: False
482 @raises autoupdater.ChromiumOSError
483
484 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700485 if not update_url and self._parser.options.image:
Scott Zawalskieadbf702013-03-14 09:23:06 -0400486 requested_build = self._parser.options.image
487 if requested_build.startswith('http://'):
488 update_url = requested_build
489 else:
490 # Try to stage any build that does not start with http:// on
491 # the devservers defined in global_config.ini.
492 update_url = self._stage_build_and_return_update_url(
493 requested_build)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500494 elif not update_url and not repair:
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700495 raise autoupdater.ChromiumOSError(
496 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500497 elif not update_url and repair:
Scott Zawalskieadbf702013-03-14 09:23:06 -0400498 update_url = self._stage_build_and_return_update_url(
499 self.get_repair_image_name())
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500500
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500501 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800502 # In case the system is in a bad state, we always reboot the machine
503 # before machine_install.
504 self.reboot(timeout=60, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500505 self.run('stop update-engine; start update-engine')
506 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800507
Chris Sosaa3ac2152012-05-23 22:23:13 -0700508 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Dan Shi0f466e82013-02-22 15:44:58 -0800509 local_devserver=local_devserver)
510 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400511 # Remove cros-version and job_repo_url host attribute from host.
512 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800513 # If the DUT is already running the same build, try stateful update
514 # first. Stateful update does not update kernel and tends to run much
515 # faster than a full reimage.
516 try:
517 updated = self._try_stateful_update(update_url, force_update,
518 updater)
519 if updated:
520 logging.info('DUT is updated with stateful update.')
521 except Exception as e:
522 logging.exception(e)
523 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700524
Dan Shi0f466e82013-02-22 15:44:58 -0800525 inactive_kernel = None
526 # Do a full update if stateful update is not applicable or failed.
527 if not updated:
528 # In case the system is in a bad state, we always reboot the
529 # machine before machine_install.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700530 self.reboot(timeout=60, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700531
532 # TODO(sosa): Remove temporary hack to get rid of bricked machines
533 # that can't update due to a corrupted policy.
534 self.run('rm -rf /var/lib/whitelist')
535 self.run('touch /var/lib/whitelist')
536 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400537 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700538
Dan Shi0f466e82013-02-22 15:44:58 -0800539 if updater.run_update(force_update):
540 updated = True
541 # Figure out active and inactive kernel.
542 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700543
Dan Shi0f466e82013-02-22 15:44:58 -0800544 # Ensure inactive kernel has higher priority than active.
545 if (updater.get_kernel_priority(inactive_kernel)
546 < updater.get_kernel_priority(active_kernel)):
547 raise autoupdater.ChromiumOSError(
548 'Update failed. The priority of the inactive kernel'
549 ' partition is less than that of the active kernel'
550 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700551
Dan Shi0f466e82013-02-22 15:44:58 -0800552 update_engine_log = '/var/log/update_engine.log'
553 logging.info('Dumping %s', update_engine_log)
554 self.run('cat %s' % update_engine_log)
555 # Updater has returned successfully; reboot the host.
556 self.reboot(timeout=60, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700557
Dan Shi0f466e82013-02-22 15:44:58 -0800558 if updated:
559 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400560 image_name = autoupdater.url_to_image_name(update_url)
561 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800562
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700563 # Clean up any old autotest directories which may be lying around.
564 for path in global_config.global_config.get_config_value(
565 'AUTOSERV', 'client_autodir_paths', type=list):
566 self.run('rm -rf ' + path)
567
568
Simran Basi833814b2013-01-29 13:13:43 -0800569 def _get_label_from_afe(self, label_prefix):
570 """Retrieve a host's specific label from the AFE.
571
572 Looks for a host label that has the form <label_prefix>:<value>
573 and returns the "<value>" part of the label. None is returned
574 if there is not a label matching the pattern
575
576 @returns the label that matches the prefix or 'None'
577 """
578 host_model = models.Host.objects.get(hostname=self.hostname)
579 host_label = host_model.labels.get(name__startswith=label_prefix)
580 if not host_label:
581 return None
582 return host_label.name.split(label_prefix, 1)[1]
583
584
Richard Barnette82c35912012-11-20 10:09:10 -0800585 def _get_board_from_afe(self):
586 """Retrieve this host's board from its labels in the AFE.
587
588 Looks for a host label of the form "board:<board>", and
589 returns the "<board>" part of the label. `None` is returned
590 if there is not a single, unique label matching the pattern.
591
592 @returns board from label, or `None`.
593 """
Simran Basi833814b2013-01-29 13:13:43 -0800594 return self._get_label_from_afe(ds_constants.BOARD_PREFIX)
595
596
597 def get_build(self):
598 """Retrieve the current build for this Host from the AFE.
599
600 Looks through this host's labels in the AFE to determine its build.
601
602 @returns The current build or None if it could not find it or if there
603 were multiple build labels assigned to this host.
604 """
605 return self._get_label_from_afe(ds_constants.VERSION_PREFIX)
Richard Barnette82c35912012-11-20 10:09:10 -0800606
607
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500608 def _install_repair(self):
609 """Attempt to repair this host using upate-engine.
610
611 If the host is up, try installing the DUT with a stable
612 "repair" version of Chrome OS as defined in the global_config
613 under CROS.stable_cros_version.
614
615 @returns True if successful, False if update_engine failed.
616
617 """
618 if not self.is_up():
619 return False
620
621 logging.info('Attempting to reimage machine to repair image.')
622 try:
623 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700624 except autoupdater.ChromiumOSError as e:
625 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500626 logging.info('Repair via install failed.')
627 return False
628
629 return True
630
631
Richard Barnette03a0c132012-11-05 12:40:35 -0800632 def _servo_repair(self, board):
633 """Attempt to repair this host using an attached Servo.
634
635 Re-install the OS on the DUT by 1) installing a test image
636 on a USB storage device attached to the Servo board,
637 2) booting that image in recovery mode, and then
638 3) installing the image.
639
640 """
641 server = dev_server.ImageServer.devserver_url_for_servo(board)
642 image = server + (self._DEFAULT_SERVO_URL_FORMAT %
643 { 'board': board })
644 self.servo.install_recovery_image(image)
645 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
646 raise error.AutoservError('DUT failed to boot from USB'
647 ' after %d seconds' %
648 self.USB_BOOT_TIMEOUT)
649 self.run('chromeos-install --yes',
650 timeout=self._INSTALL_TIMEOUT)
651 self.servo.power_long_press()
652 self.servo.set('usb_mux_sel1', 'servo_sees_usbkey')
653 self.servo.power_short_press()
654 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
655 raise error.AutoservError('DUT failed to reboot installed '
656 'test image after %d seconds' %
657 self.BOOT_TIMEOUT)
658
659
Richard Barnette82c35912012-11-20 10:09:10 -0800660 def _powercycle_to_repair(self):
661 """Utilize the RPM Infrastructure to bring the host back up.
662
663 If the host is not up/repaired after the first powercycle we utilize
664 auto fallback to the last good install by powercycling and rebooting the
665 host 6 times.
666 """
667 logging.info('Attempting repair via RPM powercycle.')
668 failed_cycles = 0
669 self.power_cycle()
670 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
671 failed_cycles += 1
672 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
673 raise error.AutoservError('Powercycled host %s %d times; '
674 'device did not come back online.' %
675 (self.hostname, failed_cycles))
676 self.power_cycle()
677 if failed_cycles == 0:
678 logging.info('Powercycling was successful first time.')
679 else:
680 logging.info('Powercycling was successful after %d failures.',
681 failed_cycles)
682
683
684 def repair_full(self):
685 """Repair a host for repair level NO_PROTECTION.
686
687 This overrides the base class function for repair; it does
688 not call back to the parent class, but instead offers a
689 simplified implementation based on the capabilities in the
690 Chrome OS test lab.
691
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700692 If `self.verify()` fails, the following procedures are
693 attempted:
694 1. Try to re-install to a known stable image using
695 auto-update.
696 2. If there's a servo for the DUT, try to re-install via
697 the servo.
698 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800699 by power-cycling.
700
701 As with the parent method, the last operation performed on
702 the DUT must be to call `self.verify()`; if that call fails,
703 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700704
Richard Barnette82c35912012-11-20 10:09:10 -0800705 """
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400706 host_board = self._get_board_from_afe()
707 if host_board is None:
708 logging.error('host %s has no board; failing repair',
709 self.hostname)
710 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500711
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400712 if not self._install_repair():
713 # TODO(scottz): All repair pathways should be
714 # executed until we've exhausted all options. Below
715 # we favor servo over powercycle when we really
716 # should be falling back to power if servo fails.
717 if (self.servo and
718 host_board in self._SERVO_REPAIR_WHITELIST):
719 self._servo_repair(host_board)
720 elif (self.has_power() and
721 host_board in self._RPM_RECOVERY_BOARDS):
722 self._powercycle_to_repair()
723 else:
724 logging.error('host %s has no servo and no RPM control; '
725 'failing repair', self.hostname)
726 raise
727 self.verify()
Richard Barnette82c35912012-11-20 10:09:10 -0800728
729
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700730 def close(self):
731 super(SiteHost, self).close()
732 self.xmlrpc_disconnect_all()
733
734
Simran Basi5e6339a2013-03-21 11:34:32 -0700735 def _cleanup_poweron(self):
736 """Special cleanup method to make sure hosts always get power back."""
737 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
738 hosts = afe.get_hosts(hostname=self.hostname)
739 if not hosts or not (self._RPM_OUTLET_CHANGED in
740 hosts[0].attributes):
741 return
742 logging.debug('This host has recently interacted with the RPM'
743 ' Infrastructure. Ensuring power is on.')
744 try:
745 self.power_on()
746 except rpm_client.RemotePowerException:
747 # If cleanup has completed but there was an issue with the RPM
748 # Infrastructure, log an error message rather than fail cleanup
749 logging.error('Failed to turn Power On for this host after '
750 'cleanup through the RPM Infrastructure.')
751 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
752 hostname=self.hostname)
753
754
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700755 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700756 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800757 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500758 try:
759 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
760 '_clear_login_prompt_state')
761 self.run('restart ui')
762 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
763 '_wait_for_login_prompt')
Alex Millerf4517962013-02-25 15:03:02 -0800764 except (error.AutotestRunError, error.AutoservRunError):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500765 logging.warn('Unable to restart ui, rebooting device.')
766 # Since restarting the UI fails fall back to normal Autotest
767 # cleanup routines, i.e. reboot the machine.
768 super(SiteHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -0700769 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -0700770 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -0700771 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700772
773
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700774 def reboot(self, **dargs):
775 """
776 This function reboots the site host. The more generic
777 RemoteHost.reboot() performs sync and sleeps for 5
778 seconds. This is not necessary for Chrome OS devices as the
779 sync should be finished in a short time during the reboot
780 command.
781 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800782 if 'reboot_cmd' not in dargs:
783 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
784 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700785 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800786 if 'fastsync' not in dargs:
787 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700788 super(SiteHost, self).reboot(**dargs)
789
790
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700791 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800792 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700793
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800794 Tests for the following conditions:
795 1. All conditions tested by the parent version of this
796 function.
797 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -0700798 3. Sufficient space in /mnt/stateful_partition/encrypted.
799 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700800
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700801 """
802 super(SiteHost, self).verify_software()
803 self.check_diskspace(
804 '/mnt/stateful_partition',
805 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -0700806 'SERVER', 'gb_diskspace_required', type=float,
807 default=20.0))
808 self.check_diskspace(
809 '/mnt/stateful_partition/encrypted',
810 global_config.global_config.get_config_value(
811 'SERVER', 'gb_encrypted_diskspace_required', type=float,
812 default=0.1))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800813 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -0500814 # Makes sure python is present, loads and can use built in functions.
815 # We have seen cases where importing cPickle fails with undefined
816 # symbols in cPickle.so.
817 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700818
819
Christopher Wileyd78249a2013-03-01 13:05:31 -0800820 def xmlrpc_connect(self, command, port, command_name=None,
821 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700822 """Connect to an XMLRPC server on the host.
823
824 The `command` argument should be a simple shell command that
825 starts an XMLRPC server on the given `port`. The command
826 must not daemonize, and must terminate cleanly on SIGTERM.
827 The command is started in the background on the host, and a
828 local XMLRPC client for the server is created and returned
829 to the caller.
830
831 Note that the process of creating an XMLRPC client makes no
832 attempt to connect to the remote server; the caller is
833 responsible for determining whether the server is running
834 correctly, and is ready to serve requests.
835
Christopher Wileyd78249a2013-03-01 13:05:31 -0800836 Optionally, the caller can pass ready_test_name, a string
837 containing the name of a method to call on the proxy. This
838 method should take no parameters and return successfully only
839 when the server is ready to process client requests. When
840 ready_test_name is set, xmlrpc_connect will block until the
841 proxy is ready, and throw a TestError if the server isn't
842 ready by timeout_seconds.
843
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700844 @param command Shell command to start the server.
845 @param port Port number on which the server is expected to
846 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800847 @param command_name String to use as input to `pkill` to
848 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -0800849 @param ready_test_name String containing the name of a
850 method defined on the XMLRPC server.
851 @param timeout_seconds Number of seconds to wait
852 for the server to become 'ready.' Will throw a
853 TestFail error if server is not ready in time.
854
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700855 """
856 self.xmlrpc_disconnect(port)
857
858 # Chrome OS on the target closes down most external ports
859 # for security. We could open the port, but doing that
860 # would conflict with security tests that check that only
861 # expected ports are open. So, to get to the port on the
862 # target we use an ssh tunnel.
863 local_port = utils.get_unused_port()
864 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
865 ssh_cmd = make_ssh_command(opts=tunnel_options)
866 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
867 logging.debug('Full tunnel command: %s', tunnel_cmd)
868 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
869 logging.debug('Started XMLRPC tunnel, local = %d'
870 ' remote = %d, pid = %d',
871 local_port, port, tunnel_proc.pid)
872
873 # Start the server on the host. Redirection in the command
874 # below is necessary, because 'ssh' won't terminate until
875 # background child processes close stdin, stdout, and
876 # stderr.
877 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
878 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
879 logging.debug('Started XMLRPC server on host %s, pid = %s',
880 self.hostname, remote_pid)
881
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800882 self._xmlrpc_proxy_map[port] = (command_name, tunnel_proc)
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700883 rpc_url = 'http://localhost:%d' % local_port
Christopher Wileyd78249a2013-03-01 13:05:31 -0800884 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
885 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -0800886 # retry.retry logs each attempt; calculate delay_sec to
887 # keep log spam to a dull roar.
Christopher Wileyd78249a2013-03-01 13:05:31 -0800888 @retry.retry((socket.error, xmlrpclib.ProtocolError),
889 timeout_min=timeout_seconds/60.0,
J. Richard Barnette13eb7c02013-03-07 12:06:29 -0800890 delay_sec=min(max(timeout_seconds/20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -0800891 def ready_test():
892 """ Call proxy.ready_test_name(). """
893 getattr(proxy, ready_test_name)()
894 successful = False
895 try:
896 logging.info('Waiting %d seconds for XMLRPC server '
897 'to start.', timeout_seconds)
898 ready_test()
899 successful = True
900 except retry.TimeoutException:
901 raise error.TestError('Unable to start XMLRPC server after '
902 '%d seconds.' % timeout_seconds)
903 finally:
904 if not successful:
905 logging.error('Failed to start XMLRPC server.')
906 self.xmlrpc_disconnect(port)
907 logging.info('XMLRPC server started successfully.')
908 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700909
910 def xmlrpc_disconnect(self, port):
911 """Disconnect from an XMLRPC server on the host.
912
913 Terminates the remote XMLRPC server previously started for
914 the given `port`. Also closes the local ssh tunnel created
915 for the connection to the host. This function does not
916 directly alter the state of a previously returned XMLRPC
917 client object; however disconnection will cause all
918 subsequent calls to methods on the object to fail.
919
920 This function does nothing if requested to disconnect a port
921 that was not previously connected via `self.xmlrpc_connect()`
922
923 @param port Port number passed to a previous call to
924 `xmlrpc_connect()`
925 """
926 if port not in self._xmlrpc_proxy_map:
927 return
928 entry = self._xmlrpc_proxy_map[port]
929 remote_name = entry[0]
930 tunnel_proc = entry[1]
931 if remote_name:
932 # We use 'pkill' to find our target process rather than
933 # a PID, because the host may have rebooted since
934 # connecting, and we don't want to kill an innocent
935 # process with the same PID.
936 #
937 # 'pkill' helpfully exits with status 1 if no target
938 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -0700939 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700940 # status.
941 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
942
943 if tunnel_proc.poll() is None:
944 tunnel_proc.terminate()
945 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
946 else:
947 logging.debug('Tunnel pid %d terminated early, status %d',
948 tunnel_proc.pid, tunnel_proc.returncode)
949 del self._xmlrpc_proxy_map[port]
950
951
952 def xmlrpc_disconnect_all(self):
953 """Disconnect all known XMLRPC proxy ports."""
954 for port in self._xmlrpc_proxy_map.keys():
955 self.xmlrpc_disconnect(port)
956
957
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800958 def _ping_check_status(self, status):
959 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700960
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800961 @param status Check the ping status against this value.
962 @return True iff `status` and the result of ping are the same
963 (i.e. both True or both False).
964
965 """
966 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
967 return not (status ^ (ping_val == 0))
968
969 def _ping_wait_for_status(self, status, timeout):
970 """Wait for the host to have a given status (UP or DOWN).
971
972 Status is checked by polling. Polling will not last longer
973 than the number of seconds in `timeout`. The polling
974 interval will be long enough that only approximately
975 _PING_WAIT_COUNT polling cycles will be executed, subject
976 to a maximum interval of about one minute.
977
978 @param status Waiting will stop immediately if `ping` of the
979 host returns this status.
980 @param timeout Poll for at most this many seconds.
981 @return True iff the host status from `ping` matched the
982 requested status at the time of return.
983
984 """
985 # _ping_check_status() takes about 1 second, hence the
986 # "- 1" in the formula below.
987 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
988 end_time = time.time() + timeout
989 while time.time() <= end_time:
990 if self._ping_check_status(status):
991 return True
992 if poll_interval > 0:
993 time.sleep(poll_interval)
994
995 # The last thing we did was sleep(poll_interval), so it may
996 # have been too long since the last `ping`. Check one more
997 # time, just to be sure.
998 return self._ping_check_status(status)
999
1000 def ping_wait_up(self, timeout):
1001 """Wait for the host to respond to `ping`.
1002
1003 N.B. This method is not a reliable substitute for
1004 `wait_up()`, because a host that responds to ping will not
1005 necessarily respond to ssh. This method should only be used
1006 if the target DUT can be considered functional even if it
1007 can't be reached via ssh.
1008
1009 @param timeout Minimum time to allow before declaring the
1010 host to be non-responsive.
1011 @return True iff the host answered to ping before the timeout.
1012
1013 """
1014 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001015
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001016 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001017 """Wait until the host no longer responds to `ping`.
1018
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001019 This function can be used as a slightly faster version of
1020 `wait_down()`, by avoiding potentially long ssh timeouts.
1021
1022 @param timeout Minimum time to allow for the host to become
1023 non-responsive.
1024 @return True iff the host quit answering ping before the
1025 timeout.
1026
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001027 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001028 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001029
1030 def test_wait_for_sleep(self):
1031 """Wait for the client to enter low-power sleep mode.
1032
1033 The test for "is asleep" can't distinguish a system that is
1034 powered off; to confirm that the unit was asleep, it is
1035 necessary to force resume, and then call
1036 `test_wait_for_resume()`.
1037
1038 This function is expected to be called from a test as part
1039 of a sequence like the following:
1040
1041 ~~~~~~~~
1042 boot_id = host.get_boot_id()
1043 # trigger sleep on the host
1044 host.test_wait_for_sleep()
1045 # trigger resume on the host
1046 host.test_wait_for_resume(boot_id)
1047 ~~~~~~~~
1048
1049 @exception TestFail The host did not go to sleep within
1050 the allowed time.
1051 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001052 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001053 raise error.TestFail(
1054 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001055 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001056
1057
1058 def test_wait_for_resume(self, old_boot_id):
1059 """Wait for the client to resume from low-power sleep mode.
1060
1061 The `old_boot_id` parameter should be the value from
1062 `get_boot_id()` obtained prior to entering sleep mode. A
1063 `TestFail` exception is raised if the boot id changes.
1064
1065 See @ref test_wait_for_sleep for more on this function's
1066 usage.
1067
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001068 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001069 target host went to sleep.
1070
1071 @exception TestFail The host did not respond within the
1072 allowed time.
1073 @exception TestFail The host responded, but the boot id test
1074 indicated a reboot rather than a sleep
1075 cycle.
1076 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001077 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001078 raise error.TestFail(
1079 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001080 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001081 else:
1082 new_boot_id = self.get_boot_id()
1083 if new_boot_id != old_boot_id:
1084 raise error.TestFail(
1085 'client rebooted, but sleep was expected'
1086 ' (old boot %s, new boot %s)'
1087 % (old_boot_id, new_boot_id))
1088
1089
1090 def test_wait_for_shutdown(self):
1091 """Wait for the client to shut down.
1092
1093 The test for "has shut down" can't distinguish a system that
1094 is merely asleep; to confirm that the unit was down, it is
1095 necessary to force boot, and then call test_wait_for_boot().
1096
1097 This function is expected to be called from a test as part
1098 of a sequence like the following:
1099
1100 ~~~~~~~~
1101 boot_id = host.get_boot_id()
1102 # trigger shutdown on the host
1103 host.test_wait_for_shutdown()
1104 # trigger boot on the host
1105 host.test_wait_for_boot(boot_id)
1106 ~~~~~~~~
1107
1108 @exception TestFail The host did not shut down within the
1109 allowed time.
1110 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001111 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001112 raise error.TestFail(
1113 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001114 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001115
1116
1117 def test_wait_for_boot(self, old_boot_id=None):
1118 """Wait for the client to boot from cold power.
1119
1120 The `old_boot_id` parameter should be the value from
1121 `get_boot_id()` obtained prior to shutting down. A
1122 `TestFail` exception is raised if the boot id does not
1123 change. The boot id test is omitted if `old_boot_id` is not
1124 specified.
1125
1126 See @ref test_wait_for_shutdown for more on this function's
1127 usage.
1128
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001129 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001130 shut down.
1131
1132 @exception TestFail The host did not respond within the
1133 allowed time.
1134 @exception TestFail The host responded, but the boot id test
1135 indicated that there was no reboot.
1136 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001137 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001138 raise error.TestFail(
1139 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001140 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001141 elif old_boot_id:
1142 if self.get_boot_id() == old_boot_id:
1143 raise error.TestFail(
1144 'client is back up, but did not reboot'
1145 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001146
1147
1148 @staticmethod
1149 def check_for_rpm_support(hostname):
1150 """For a given hostname, return whether or not it is powered by an RPM.
1151
1152 @return None if this host does not follows the defined naming format
1153 for RPM powered DUT's in the lab. If it does follow the format,
1154 it returns a regular expression MatchObject instead.
1155 """
Richard Barnette82c35912012-11-20 10:09:10 -08001156 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001157
1158
1159 def has_power(self):
1160 """For this host, return whether or not it is powered by an RPM.
1161
1162 @return True if this host is in the CROS lab and follows the defined
1163 naming format.
1164 """
1165 return SiteHost.check_for_rpm_support(self.hostname)
1166
1167
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001168 def _set_power(self, state, power_method):
1169 """Sets the power to the host via RPM, Servo or manual.
1170
1171 @param state Specifies which power state to set to DUT
1172 @param power_method Specifies which method of power control to
1173 use. By default "RPM" will be used. Valid values
1174 are the strings "RPM", "manual", "servoj10".
1175
1176 """
1177 ACCEPTABLE_STATES = ['ON', 'OFF']
1178
1179 if state.upper() not in ACCEPTABLE_STATES:
1180 raise error.TestError('State must be one of: %s.'
1181 % (ACCEPTABLE_STATES,))
1182
1183 if power_method == self.POWER_CONTROL_SERVO:
1184 logging.info('Setting servo port J10 to %s', state)
1185 self.servo.set('prtctl3_pwren', state.lower())
1186 time.sleep(self._USB_POWER_TIMEOUT)
1187 elif power_method == self.POWER_CONTROL_MANUAL:
1188 logging.info('You have %d seconds to set the AC power to %s.',
1189 self._POWER_CYCLE_TIMEOUT, state)
1190 time.sleep(self._POWER_CYCLE_TIMEOUT)
1191 else:
1192 if not self.has_power():
1193 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001194 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1195 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1196 hostname=self.hostname)
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001197 rpm_client.set_power(self.hostname, state.upper())
Simran Basid5e5e272012-09-24 15:23:59 -07001198
1199
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001200 def power_off(self, power_method=POWER_CONTROL_RPM):
1201 """Turn off power to this host via RPM, Servo or manual.
1202
1203 @param power_method Specifies which method of power control to
1204 use. By default "RPM" will be used. Valid values
1205 are the strings "RPM", "manual", "servoj10".
1206
1207 """
1208 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001209
1210
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001211 def power_on(self, power_method=POWER_CONTROL_RPM):
1212 """Turn on power to this host via RPM, Servo or manual.
1213
1214 @param power_method Specifies which method of power control to
1215 use. By default "RPM" will be used. Valid values
1216 are the strings "RPM", "manual", "servoj10".
1217
1218 """
1219 self._set_power('ON', power_method)
1220
1221
1222 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1223 """Cycle power to this host by turning it OFF, then ON.
1224
1225 @param power_method Specifies which method of power control to
1226 use. By default "RPM" will be used. Valid values
1227 are the strings "RPM", "manual", "servoj10".
1228
1229 """
1230 if power_method in (self.POWER_CONTROL_SERVO,
1231 self.POWER_CONTROL_MANUAL):
1232 self.power_off(power_method=power_method)
1233 time.sleep(self._POWER_CYCLE_TIMEOUT)
1234 self.power_on(power_method=power_method)
1235 else:
1236 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001237
1238
1239 def get_platform(self):
1240 """Determine the correct platform label for this host.
1241
1242 @returns a string representing this host's platform.
1243 """
1244 crossystem = utils.Crossystem(self)
1245 crossystem.init()
1246 # Extract fwid value and use the leading part as the platform id.
1247 # fwid generally follow the format of {platform}.{firmware version}
1248 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1249 platform = crossystem.fwid().split('.')[0].lower()
1250 # Newer platforms start with 'Google_' while the older ones do not.
1251 return platform.replace('google_', '')
1252
1253
Aviv Keshet74c89a92013-02-04 15:18:30 -08001254 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001255 def get_board(self):
1256 """Determine the correct board label for this host.
1257
1258 @returns a string representing this host's board.
1259 """
1260 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1261 run_method=self.run)
1262 board = release_info['CHROMEOS_RELEASE_BOARD']
1263 # Devices in the lab generally have the correct board name but our own
1264 # development devices have {board_name}-signed-{key_type}. The board
1265 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001266 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001267 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001268 return board_format_string % board.split('-')[0]
1269 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001270
1271
Aviv Keshet74c89a92013-02-04 15:18:30 -08001272 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001273 def has_lightsensor(self):
1274 """Determine the correct board label for this host.
1275
1276 @returns the string 'lightsensor' if this host has a lightsensor or
1277 None if it does not.
1278 """
1279 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001280 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001281 try:
1282 # Run the search cmd following the symlinks. Stderr_tee is set to
1283 # None as there can be a symlink loop, but the command will still
1284 # execute correctly with a few messages printed to stderr.
1285 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1286 return 'lightsensor'
1287 except error.AutoservRunError:
1288 # egrep exited with a return code of 1 meaning none of the possible
1289 # lightsensor files existed.
1290 return None
1291
1292
Aviv Keshet74c89a92013-02-04 15:18:30 -08001293 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001294 def has_bluetooth(self):
1295 """Determine the correct board label for this host.
1296
1297 @returns the string 'bluetooth' if this host has bluetooth or
1298 None if it does not.
1299 """
1300 try:
1301 self.run('test -d /sys/class/bluetooth/hci0')
1302 # test exited with a return code of 0.
1303 return 'bluetooth'
1304 except error.AutoservRunError:
1305 # test exited with a return code 1 meaning the directory did not
1306 # exist.
1307 return None
1308
1309
1310 def get_labels(self):
1311 """Return a list of labels for this given host.
1312
1313 This is the main way to retrieve all the automatic labels for a host
1314 as it will run through all the currently implemented label functions.
1315 """
1316 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001317 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001318 label = label_function(self)
1319 if label:
1320 labels.append(label)
1321 return labels