blob: c7c14dab5ec85ee07076769f9acc01fba09c4634 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
J. Richard Barnette1d78b012012-05-15 13:56:30 -07006import logging
Dan Shi0f466e82013-02-22 15:44:58 -08007import os
Simran Basid5e5e272012-09-24 15:23:59 -07008import re
Christopher Wileyd78249a2013-03-01 13:05:31 -08009import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070010import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070011import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013
J. Richard Barnette45e93de2012-04-11 17:24:15 -070014from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080015from autotest_lib.client.common_lib import error
16from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080018from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080019from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080020from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070022from autotest_lib.server import autotest
J. Richard Barnette45e93de2012-04-11 17:24:15 -070023from autotest_lib.server import site_host_attributes
Scott Zawalski89c44dd2013-02-26 09:28:02 -050024from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070025from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
J. Richard Barnette75487572013-03-08 12:47:50 -080026from autotest_lib.server.cros.servo import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070027from autotest_lib.server.hosts import remote
Simran Basidcff4252012-11-20 16:13:20 -080028from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070029
Richard Barnette82c35912012-11-20 10:09:10 -080030# Importing frontend.afe.models requires a full Autotest
31# installation (with the Django modules), not just the source
32# repository. Most developers won't have the full installation, so
33# the imports below will fail for them.
34#
35# The fix is to catch import exceptions, and set `models` to `None`
36# on failure. This has the side effect that
37# SiteHost._get_board_from_afe() will fail: That will manifest as
38# failures during Repair jobs leaving the DUT as "Repair Failed".
39# In practice, you can't test Repair jobs without a full
40# installation, so that kind of failure isn't expected.
41try:
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080042 # pylint: disable=W0611
Richard Barnette82c35912012-11-20 10:09:10 -080043 from autotest_lib.frontend import setup_django_environment
44 from autotest_lib.frontend.afe import models
45except:
46 models = None
47
Simran Basid5e5e272012-09-24 15:23:59 -070048
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080049def _make_servo_hostname(hostname):
50 host_parts = hostname.split('.')
51 host_parts[0] = host_parts[0] + '-servo'
52 return '.'.join(host_parts)
53
54
55def _get_lab_servo(target_hostname):
56 """Instantiate a Servo for |target_hostname| in the lab.
57
58 Assuming that |target_hostname| is a device in the CrOS test
59 lab, create and return a Servo object pointed at the servo
60 attached to that DUT. The servo in the test lab is assumed
61 to already have servod up and running on it.
62
63 @param target_hostname: device whose servo we want to target.
64 @return an appropriately configured Servo instance.
65 """
66 servo_host = _make_servo_hostname(target_hostname)
67 if utils.host_is_in_lab_zone(servo_host):
68 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080069 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080070 except: # pylint: disable=W0702
71 # TODO(jrbarnette): Long-term, if we can't get to
72 # a servo in the lab, we want to fail, so we should
73 # pass any exceptions along. Short-term, we're not
74 # ready to rely on servo, so we ignore failures.
75 pass
76 return None
77
78
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070079def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
80 connect_timeout=None, alive_interval=None):
81 """Override default make_ssh_command to use options tuned for Chrome OS.
82
83 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070084 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
85 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070086
Dale Curtisaa5eedb2011-08-23 16:18:52 -070087 - ServerAliveInterval=180; which causes SSH to ping connection every
88 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
89 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
90 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070091
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070092 - ServerAliveCountMax=3; consistency with remote_access.sh.
93
94 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
95 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070096
97 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
98 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070099
100 - SSH protocol forced to 2; needed for ServerAliveInterval.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800101
102 @param user User name to use for the ssh connection.
103 @param port Port on the target host to use for ssh connection.
104 @param opts Additional options to the ssh command.
105 @param hosts_file Ignored.
106 @param connect_timeout Ignored.
107 @param alive_interval Ignored.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700108 """
109 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
110 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -0700111 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
112 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
113 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700114 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700115
116
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800117
Aviv Keshet74c89a92013-02-04 15:18:30 -0800118def add_label_detector(label_function_list, label_list=None, label=None):
119 """Decorator used to group functions together into the provided list.
120 @param label_function_list: List of label detecting functions to add
121 decorated function to.
122 @param label_list: List of detectable labels to add detectable labels to.
123 (Default: None)
124 @param label: Label string that is detectable by this detection function
125 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800126 """
Simran Basic6f1f7a2012-10-16 10:47:46 -0700127 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -0800128 """
129 @param func: The function to be added as a detector.
130 """
131 label_function_list.append(func)
132 if label and label_list is not None:
133 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700134 return func
135 return add_func
136
137
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700138class SiteHost(remote.RemoteHost):
139 """Chromium OS specific subclass of Host."""
140
141 _parser = autoserv_parser.autoserv_parser
142
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800143 # Time to wait for new kernel to be marked successful after
144 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -0700145 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700146
Richard Barnette03a0c132012-11-05 12:40:35 -0800147 # Timeout values (in seconds) associated with various Chrome OS
148 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700149 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800150 # In general, a good rule of thumb is that the timeout can be up
151 # to twice the typical measured value on the slowest platform.
152 # The times here have not necessarily been empirically tested to
153 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700154 #
155 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800156 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
157 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700158 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800159 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800160 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700161 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800162 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800163 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800164 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700165 # REBOOT_TIMEOUT: Combination of shutdown and reboot times.
J. Richard Barnettecb934ac2013-04-01 14:27:54 -0700166 # _UPDATE_REBOOT_TIMEOUT: Time to allow for reboot after AU; this
167 # time provides no allowance for the 30 second dev-mode delay,
168 # but is deliberately generous to avoid try-job failures.
Richard Barnette03a0c132012-11-05 12:40:35 -0800169 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700170
171 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800172 RESUME_TIMEOUT = 10
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700173 BOOT_TIMEOUT = 45
174 USB_BOOT_TIMEOUT = 150
175 SHUTDOWN_TIMEOUT = 5
176 REBOOT_TIMEOUT = SHUTDOWN_TIMEOUT + BOOT_TIMEOUT
J. Richard Barnettecb934ac2013-04-01 14:27:54 -0700177 # TODO(jrbarnette) - temporarily set this value to 2 min to allow
178 # for http://crbug.com/224871. Reset to 1 minute once that bug
179 # is fixed.
180 _UPDATE_REBOOT_TIMEOUT = 120
Richard Barnette03a0c132012-11-05 12:40:35 -0800181 _INSTALL_TIMEOUT = 240
182
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800183 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
184 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
185 _USB_POWER_TIMEOUT = 5
186 _POWER_CYCLE_TIMEOUT = 10
187
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800188
Richard Barnette82c35912012-11-20 10:09:10 -0800189 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
190 'rpm_recovery_boards', type=str).split(',')
191
192 _MAX_POWER_CYCLE_ATTEMPTS = 6
193 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
194 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
195 'host[0-9]+')
196 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
197 'in_illuminance0_raw',
198 'illuminance0_input']
199 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
200 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800201 _DETECTABLE_LABELS = []
202 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
203 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700204
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800205 # Constants used in ping_wait_up() and ping_wait_down().
206 #
207 # _PING_WAIT_COUNT is the approximate number of polling
208 # cycles to use when waiting for a host state change.
209 #
210 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
211 # for arguments to the internal _ping_wait_for_status()
212 # method.
213 _PING_WAIT_COUNT = 40
214 _PING_STATUS_DOWN = False
215 _PING_STATUS_UP = True
216
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800217 # Allowed values for the power_method argument.
218
219 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
220 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
221 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
222 POWER_CONTROL_RPM = 'RPM'
223 POWER_CONTROL_SERVO = 'servoj10'
224 POWER_CONTROL_MANUAL = 'manual'
225
226 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
227 POWER_CONTROL_SERVO,
228 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800229
Simran Basi5e6339a2013-03-21 11:34:32 -0700230 _RPM_OUTLET_CHANGED = 'outlet_changed'
231
J. Richard Barnette964fba02012-10-24 17:34:29 -0700232 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800233 def get_servo_arguments(args_dict):
234 """Extract servo options from `args_dict` and return the result.
235
236 Take the provided dictionary of argument options and return
237 a subset that represent standard arguments needed to
238 construct a servo object for a host. The intent is to
239 provide standard argument processing from run_remote_tests
240 for tests that require a servo to operate.
241
242 Recommended usage:
243 ~~~~~~~~
244 args_dict = utils.args_to_dict(args)
245 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
246 host = hosts.create_host(machine, servo_args=servo_args)
247 ~~~~~~~~
248
249 @param args_dict Dictionary from which to extract the servo
250 arguments.
251 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700252 servo_args = {}
253 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800254 if arg in args_dict:
255 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700256 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700257
J. Richard Barnette964fba02012-10-24 17:34:29 -0700258
259 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700260 """Initialize superclasses, and |self.servo|.
261
262 For creating the host servo object, there are three
263 possibilities: First, if the host is a lab system known to
264 have a servo board, we connect to that servo unconditionally.
265 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700266 servo features for testing, it will pass settings for
267 `servo_host`, `servo_port`, or both. If neither of these
268 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700269
270 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700271 super(SiteHost, self)._initialize(hostname=hostname,
272 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700273 # self.env is a dictionary of environment variable settings
274 # to be exported for commands run on the host.
275 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
276 # errors that might happen.
277 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700278 self._xmlrpc_proxy_map = {}
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800279 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700280 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700281 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700282
283
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500284 def get_repair_image_name(self):
285 """Generate a image_name from variables in the global config.
286
287 @returns a str of $board-version/$BUILD.
288
289 """
290 stable_version = global_config.global_config.get_config_value(
291 'CROS', 'stable_cros_version')
292 build_pattern = global_config.global_config.get_config_value(
293 'CROS', 'stable_build_pattern')
294 board = self._get_board_from_afe()
295 if board is None:
296 raise error.AutoservError('DUT has no board attribute, '
297 'cannot be repaired.')
298 return build_pattern % (board, stable_version)
299
300
301 def clear_cros_version_labels_and_job_repo_url(self):
302 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalskieadbf702013-03-14 09:23:06 -0400303 try:
304 host_model = models.Host.objects.get(hostname=self.hostname)
305 except models.Host.DoesNotExist:
306 return
307
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500308 for label in host_model.labels.iterator():
309 if not label.name.startswith(ds_constants.VERSION_PREFIX):
310 continue
Dan Shi0f466e82013-02-22 15:44:58 -0800311
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500312 label.host_set.remove(host_model)
313
314 host_model.set_or_delete_attribute('job_repo_url', None)
315
316
Scott Zawalskieadbf702013-03-14 09:23:06 -0400317 def add_cros_version_labels_and_job_repo_url(self, image_name):
318 """Add cros_version labels and host attribute job_repo_url.
319
320 @param image_name: The name of the image e.g.
321 lumpy-release/R27-3837.0.0
322 """
323 try:
324 host_model = models.Host.objects.get(hostname=self.hostname)
325 except models.Host.DoesNotExist:
326 return
327 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
328 devserver_url = dev_server.ImageServer.resolve(image_name).url()
329 try:
330 label_model = models.Label.objects.get(name=cros_label)
331 except models.Label.DoesNotExist:
332 label_model = models.Label.objects.create(name=cros_label)
333 host_model.labels.add(label_model)
334 repo_url = tools.get_package_url(devserver_url, image_name)
335 host_model.set_or_delete_attribute('job_repo_url', repo_url)
336
337
Dan Shi0f466e82013-02-22 15:44:58 -0800338 def _try_stateful_update(self, update_url, force_update, updater):
339 """Try to use stateful update to initialize DUT.
340
341 When DUT is already running the same version that machine_install
342 tries to install, stateful update is a much faster way to clean up
343 the DUT for testing, compared to a full reimage. It is implemeted
344 by calling autoupdater.run_update, but skipping updating root, as
345 updating the kernel is time consuming and not necessary.
346
347 @param update_url: url of the image.
348 @param force_update: Set to True to update the image even if the DUT
349 is running the same version.
350 @param updater: ChromiumOSUpdater instance used to update the DUT.
351 @returns: True if the DUT was updated with stateful update.
352
353 """
354 if not updater.check_version():
355 return False
356 if not force_update:
357 logging.info('Canceling stateful update because the new and '
358 'old versions are the same.')
359 return False
360 # Following folders should be rebuilt after stateful update.
361 # A test file is used to confirm each folder gets rebuilt after
362 # the stateful update.
363 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
364 test_file = '.test_file_to_be_deleted'
365 for folder in folders_to_check:
366 touch_path = os.path.join(folder, test_file)
367 self.run('touch %s' % touch_path)
368
369 if not updater.run_update(force_update=True, update_root=False):
370 return False
371
372 # Reboot to complete stateful update.
J. Richard Barnettecb934ac2013-04-01 14:27:54 -0700373 self.reboot(timeout=self._UPDATE_REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800374 check_file_cmd = 'test -f %s; echo $?'
375 for folder in folders_to_check:
376 test_file_path = os.path.join(folder, test_file)
377 result = self.run(check_file_cmd % test_file_path,
378 ignore_status=True)
379 if result.exit_status == 1:
380 return False
381 return True
382
383
384 def _post_update_processing(self, updater, inactive_kernel=None):
385 """After the DUT is updated, confirm machine_install succeeded.
386
387 @param updater: ChromiumOSUpdater instance used to update the DUT.
388 @param inactive_kernel: kernel state of inactive kernel before reboot.
389
390 """
391
392 # Touch the lab machine file to leave a marker that distinguishes
393 # this image from other test images.
394 self.run('touch %s' % self._LAB_MACHINE_FILE)
395
396 # Kick off the autoreboot script as the _LAB_MACHINE_FILE was
397 # missing on the first boot.
398 self.run('start autoreboot')
399
400 # Following the reboot, verify the correct version.
Dan Shib95bb862013-03-22 16:29:28 -0700401 if not updater.check_version_to_confirm_install():
Dan Shi0f466e82013-02-22 15:44:58 -0800402 # Print out crossystem to make it easier to debug the rollback.
403 logging.debug('Dumping partition table.')
Dan Shi346725f2013-03-20 15:22:38 -0700404 self.run('cgpt show $(rootdev -s -d)')
Dan Shi0f466e82013-02-22 15:44:58 -0800405 logging.debug('Dumping crossystem for firmware debugging.')
Dan Shi346725f2013-03-20 15:22:38 -0700406 self.run('crossystem --all')
Dan Shi0f466e82013-02-22 15:44:58 -0800407 logging.error('Expected Chromium OS version: %s. '
408 'Found Chromium OS %s',
Dan Shi346725f2013-03-20 15:22:38 -0700409 updater.update_version, updater.get_build_id())
410 raise autoupdater.ChromiumOSError('Updater failed on host %s' %
411 self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800412
413 # Figure out newly active kernel.
414 new_active_kernel, _ = updater.get_kernel_state()
415
416 # Ensure that previously inactive kernel is now the active kernel.
417 if inactive_kernel and new_active_kernel != inactive_kernel:
418 raise autoupdater.ChromiumOSError(
419 'Update failed. New kernel partition is not active after'
420 ' boot.')
421
Scott Zawalskieadbf702013-03-14 09:23:06 -0400422 try:
423 host_attributes = site_host_attributes.HostAttributes(self.hostname)
424 except models.Host.DoesNotExist:
425 host_attributes = None
426 if host_attributes and host_attributes.has_chromeos_firmware:
Dan Shi0f466e82013-02-22 15:44:58 -0800427 # Wait until tries == 0 and success, or until timeout.
428 utils.poll_for_condition(
429 lambda: (updater.get_kernel_tries(new_active_kernel) == 0
430 and updater.get_kernel_success(new_active_kernel)),
431 exception=autoupdater.ChromiumOSError(
432 'Update failed. Timed out waiting for system to mark'
433 ' new kernel as successful.'),
434 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
435
436
Scott Zawalskieadbf702013-03-14 09:23:06 -0400437 def _stage_build_and_return_update_url(self, image_name):
438 """Stage a build on a devserver and return the update_url.
439
440 @param image_name: a name like lumpy-release/R27-3837.0.0
441 @returns an update URL like:
442 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
443 """
444 logging.info('Staging requested build: %s', image_name)
445 devserver = dev_server.ImageServer.resolve(image_name)
446 devserver.trigger_download(image_name, synchronous=False)
447 return tools.image_url_pattern() % (devserver.url(), image_name)
448
449
Chris Sosaa3ac2152012-05-23 22:23:13 -0700450 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500451 local_devserver=False, repair=False):
452 """Install the DUT.
453
Dan Shi0f466e82013-02-22 15:44:58 -0800454 Use stateful update if the DUT is already running the same build.
455 Stateful update does not update kernel and tends to run much faster
456 than a full reimage. If the DUT is running a different build, or it
457 failed to do a stateful update, full update, including kernel update,
458 will be applied to the DUT.
459
Scott Zawalskieadbf702013-03-14 09:23:06 -0400460 Once a host enters machine_install its cros_version label will be
461 removed as well as its host attribute job_repo_url (used for
462 package install).
463
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500464 @param update_url: The url to use for the update
465 pattern: http://$devserver:###/update/$build
466 If update_url is None and repair is True we will install the
467 stable image listed in global_config under
468 CROS.stable_cros_version.
469 @param force_update: Force an update even if the version installed
470 is the same. Default:False
471 @param local_devserver: Used by run_remote_test to allow people to
472 use their local devserver. Default: False
473 @param repair: Whether or not we are in repair mode. This adds special
474 cases for repairing a machine like starting update_engine.
475 Setting repair to True sets force_update to True as well.
476 default: False
477 @raises autoupdater.ChromiumOSError
478
479 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700480 if not update_url and self._parser.options.image:
Scott Zawalskieadbf702013-03-14 09:23:06 -0400481 requested_build = self._parser.options.image
482 if requested_build.startswith('http://'):
483 update_url = requested_build
484 else:
485 # Try to stage any build that does not start with http:// on
486 # the devservers defined in global_config.ini.
487 update_url = self._stage_build_and_return_update_url(
488 requested_build)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500489 elif not update_url and not repair:
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700490 raise autoupdater.ChromiumOSError(
491 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500492 elif not update_url and repair:
Scott Zawalskieadbf702013-03-14 09:23:06 -0400493 update_url = self._stage_build_and_return_update_url(
494 self.get_repair_image_name())
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500495
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500496 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800497 # In case the system is in a bad state, we always reboot the machine
498 # before machine_install.
J. Richard Barnettecb934ac2013-04-01 14:27:54 -0700499 self.reboot(timeout=self._UPDATE_REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500500 self.run('stop update-engine; start update-engine')
501 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800502
Chris Sosaa3ac2152012-05-23 22:23:13 -0700503 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Dan Shi0f466e82013-02-22 15:44:58 -0800504 local_devserver=local_devserver)
505 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400506 # Remove cros-version and job_repo_url host attribute from host.
507 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800508 # If the DUT is already running the same build, try stateful update
509 # first. Stateful update does not update kernel and tends to run much
510 # faster than a full reimage.
511 try:
512 updated = self._try_stateful_update(update_url, force_update,
513 updater)
514 if updated:
515 logging.info('DUT is updated with stateful update.')
516 except Exception as e:
517 logging.exception(e)
518 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700519
Dan Shi0f466e82013-02-22 15:44:58 -0800520 inactive_kernel = None
521 # Do a full update if stateful update is not applicable or failed.
522 if not updated:
523 # In case the system is in a bad state, we always reboot the
524 # machine before machine_install.
J. Richard Barnettecb934ac2013-04-01 14:27:54 -0700525 self.reboot(timeout=self._UPDATE_REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700526
527 # TODO(sosa): Remove temporary hack to get rid of bricked machines
528 # that can't update due to a corrupted policy.
529 self.run('rm -rf /var/lib/whitelist')
530 self.run('touch /var/lib/whitelist')
531 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400532 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700533
Dan Shi0f466e82013-02-22 15:44:58 -0800534 if updater.run_update(force_update):
535 updated = True
536 # Figure out active and inactive kernel.
537 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700538
Dan Shi0f466e82013-02-22 15:44:58 -0800539 # Ensure inactive kernel has higher priority than active.
540 if (updater.get_kernel_priority(inactive_kernel)
541 < updater.get_kernel_priority(active_kernel)):
542 raise autoupdater.ChromiumOSError(
543 'Update failed. The priority of the inactive kernel'
544 ' partition is less than that of the active kernel'
545 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700546
Dan Shi0f466e82013-02-22 15:44:58 -0800547 update_engine_log = '/var/log/update_engine.log'
548 logging.info('Dumping %s', update_engine_log)
549 self.run('cat %s' % update_engine_log)
550 # Updater has returned successfully; reboot the host.
J. Richard Barnettecb934ac2013-04-01 14:27:54 -0700551 self.reboot(timeout=self._UPDATE_REBOOT_TIMEOUT,
552 wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700553
Dan Shi0f466e82013-02-22 15:44:58 -0800554 if updated:
555 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400556 image_name = autoupdater.url_to_image_name(update_url)
557 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800558
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700559 # Clean up any old autotest directories which may be lying around.
560 for path in global_config.global_config.get_config_value(
561 'AUTOSERV', 'client_autodir_paths', type=list):
562 self.run('rm -rf ' + path)
563
564
Simran Basi833814b2013-01-29 13:13:43 -0800565 def _get_label_from_afe(self, label_prefix):
566 """Retrieve a host's specific label from the AFE.
567
568 Looks for a host label that has the form <label_prefix>:<value>
569 and returns the "<value>" part of the label. None is returned
570 if there is not a label matching the pattern
571
572 @returns the label that matches the prefix or 'None'
573 """
574 host_model = models.Host.objects.get(hostname=self.hostname)
575 host_label = host_model.labels.get(name__startswith=label_prefix)
576 if not host_label:
577 return None
578 return host_label.name.split(label_prefix, 1)[1]
579
580
Richard Barnette82c35912012-11-20 10:09:10 -0800581 def _get_board_from_afe(self):
582 """Retrieve this host's board from its labels in the AFE.
583
584 Looks for a host label of the form "board:<board>", and
585 returns the "<board>" part of the label. `None` is returned
586 if there is not a single, unique label matching the pattern.
587
588 @returns board from label, or `None`.
589 """
Simran Basi833814b2013-01-29 13:13:43 -0800590 return self._get_label_from_afe(ds_constants.BOARD_PREFIX)
591
592
593 def get_build(self):
594 """Retrieve the current build for this Host from the AFE.
595
596 Looks through this host's labels in the AFE to determine its build.
597
598 @returns The current build or None if it could not find it or if there
599 were multiple build labels assigned to this host.
600 """
601 return self._get_label_from_afe(ds_constants.VERSION_PREFIX)
Richard Barnette82c35912012-11-20 10:09:10 -0800602
603
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500604 def _install_repair(self):
605 """Attempt to repair this host using upate-engine.
606
607 If the host is up, try installing the DUT with a stable
608 "repair" version of Chrome OS as defined in the global_config
609 under CROS.stable_cros_version.
610
611 @returns True if successful, False if update_engine failed.
612
613 """
614 if not self.is_up():
615 return False
616
617 logging.info('Attempting to reimage machine to repair image.')
618 try:
619 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700620 except autoupdater.ChromiumOSError as e:
621 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500622 logging.info('Repair via install failed.')
623 return False
624
625 return True
626
627
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700628 def servo_repair(self, image_url):
Richard Barnette03a0c132012-11-05 12:40:35 -0800629 """Attempt to repair this host using an attached Servo.
630
631 Re-install the OS on the DUT by 1) installing a test image
632 on a USB storage device attached to the Servo board,
633 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700634 3) installing the image with chromeos-install.
635
636 @param image_url URL from which to download the test image to
637 be installed the DUT.
Richard Barnette03a0c132012-11-05 12:40:35 -0800638
639 """
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700640 self.servo.install_recovery_image(image_url)
Richard Barnette03a0c132012-11-05 12:40:35 -0800641 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
642 raise error.AutoservError('DUT failed to boot from USB'
643 ' after %d seconds' %
644 self.USB_BOOT_TIMEOUT)
645 self.run('chromeos-install --yes',
646 timeout=self._INSTALL_TIMEOUT)
647 self.servo.power_long_press()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700648 self.servo.switch_usbkey('host')
Richard Barnette03a0c132012-11-05 12:40:35 -0800649 self.servo.power_short_press()
650 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
651 raise error.AutoservError('DUT failed to reboot installed '
652 'test image after %d seconds' %
653 self.BOOT_TIMEOUT)
654
655
Richard Barnette82c35912012-11-20 10:09:10 -0800656 def _powercycle_to_repair(self):
657 """Utilize the RPM Infrastructure to bring the host back up.
658
659 If the host is not up/repaired after the first powercycle we utilize
660 auto fallback to the last good install by powercycling and rebooting the
661 host 6 times.
662 """
663 logging.info('Attempting repair via RPM powercycle.')
664 failed_cycles = 0
665 self.power_cycle()
666 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
667 failed_cycles += 1
668 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
669 raise error.AutoservError('Powercycled host %s %d times; '
670 'device did not come back online.' %
671 (self.hostname, failed_cycles))
672 self.power_cycle()
673 if failed_cycles == 0:
674 logging.info('Powercycling was successful first time.')
675 else:
676 logging.info('Powercycling was successful after %d failures.',
677 failed_cycles)
678
679
680 def repair_full(self):
681 """Repair a host for repair level NO_PROTECTION.
682
683 This overrides the base class function for repair; it does
684 not call back to the parent class, but instead offers a
685 simplified implementation based on the capabilities in the
686 Chrome OS test lab.
687
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700688 If `self.verify()` fails, the following procedures are
689 attempted:
690 1. Try to re-install to a known stable image using
691 auto-update.
692 2. If there's a servo for the DUT, try to re-install via
693 the servo.
694 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800695 by power-cycling.
696
697 As with the parent method, the last operation performed on
698 the DUT must be to call `self.verify()`; if that call fails,
699 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700700
Richard Barnette82c35912012-11-20 10:09:10 -0800701 """
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400702 host_board = self._get_board_from_afe()
703 if host_board is None:
704 logging.error('host %s has no board; failing repair',
705 self.hostname)
706 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500707
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400708 if not self._install_repair():
709 # TODO(scottz): All repair pathways should be
710 # executed until we've exhausted all options. Below
711 # we favor servo over powercycle when we really
712 # should be falling back to power if servo fails.
J. Richard Barnette69929a52013-03-15 13:22:11 -0700713 if (self.servo and self.servo.recovery_supported()):
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700714 self.servo_repair(
715 dev_server.ImageServer.devserver_url_for_servo(host_board))
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400716 elif (self.has_power() and
717 host_board in self._RPM_RECOVERY_BOARDS):
718 self._powercycle_to_repair()
719 else:
720 logging.error('host %s has no servo and no RPM control; '
721 'failing repair', self.hostname)
722 raise
723 self.verify()
Richard Barnette82c35912012-11-20 10:09:10 -0800724
725
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700726 def close(self):
727 super(SiteHost, self).close()
728 self.xmlrpc_disconnect_all()
729
730
Simran Basi5e6339a2013-03-21 11:34:32 -0700731 def _cleanup_poweron(self):
732 """Special cleanup method to make sure hosts always get power back."""
733 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
734 hosts = afe.get_hosts(hostname=self.hostname)
735 if not hosts or not (self._RPM_OUTLET_CHANGED in
736 hosts[0].attributes):
737 return
738 logging.debug('This host has recently interacted with the RPM'
739 ' Infrastructure. Ensuring power is on.')
740 try:
741 self.power_on()
742 except rpm_client.RemotePowerException:
743 # If cleanup has completed but there was an issue with the RPM
744 # Infrastructure, log an error message rather than fail cleanup
745 logging.error('Failed to turn Power On for this host after '
746 'cleanup through the RPM Infrastructure.')
747 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
748 hostname=self.hostname)
749
750
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700751 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700752 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800753 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500754 try:
755 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
756 '_clear_login_prompt_state')
757 self.run('restart ui')
758 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
759 '_wait_for_login_prompt')
Alex Millerf4517962013-02-25 15:03:02 -0800760 except (error.AutotestRunError, error.AutoservRunError):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500761 logging.warn('Unable to restart ui, rebooting device.')
762 # Since restarting the UI fails fall back to normal Autotest
763 # cleanup routines, i.e. reboot the machine.
764 super(SiteHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -0700765 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -0700766 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -0700767 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700768
769
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700770 def reboot(self, **dargs):
771 """
772 This function reboots the site host. The more generic
773 RemoteHost.reboot() performs sync and sleeps for 5
774 seconds. This is not necessary for Chrome OS devices as the
775 sync should be finished in a short time during the reboot
776 command.
777 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800778 if 'reboot_cmd' not in dargs:
779 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
780 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700781 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800782 if 'fastsync' not in dargs:
783 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700784 super(SiteHost, self).reboot(**dargs)
785
786
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700787 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800788 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700789
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800790 Tests for the following conditions:
791 1. All conditions tested by the parent version of this
792 function.
793 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -0700794 3. Sufficient space in /mnt/stateful_partition/encrypted.
795 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700796
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700797 """
798 super(SiteHost, self).verify_software()
799 self.check_diskspace(
800 '/mnt/stateful_partition',
801 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -0700802 'SERVER', 'gb_diskspace_required', type=float,
803 default=20.0))
804 self.check_diskspace(
805 '/mnt/stateful_partition/encrypted',
806 global_config.global_config.get_config_value(
807 'SERVER', 'gb_encrypted_diskspace_required', type=float,
808 default=0.1))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800809 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -0500810 # Makes sure python is present, loads and can use built in functions.
811 # We have seen cases where importing cPickle fails with undefined
812 # symbols in cPickle.so.
813 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700814
815
Christopher Wileyd78249a2013-03-01 13:05:31 -0800816 def xmlrpc_connect(self, command, port, command_name=None,
817 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700818 """Connect to an XMLRPC server on the host.
819
820 The `command` argument should be a simple shell command that
821 starts an XMLRPC server on the given `port`. The command
822 must not daemonize, and must terminate cleanly on SIGTERM.
823 The command is started in the background on the host, and a
824 local XMLRPC client for the server is created and returned
825 to the caller.
826
827 Note that the process of creating an XMLRPC client makes no
828 attempt to connect to the remote server; the caller is
829 responsible for determining whether the server is running
830 correctly, and is ready to serve requests.
831
Christopher Wileyd78249a2013-03-01 13:05:31 -0800832 Optionally, the caller can pass ready_test_name, a string
833 containing the name of a method to call on the proxy. This
834 method should take no parameters and return successfully only
835 when the server is ready to process client requests. When
836 ready_test_name is set, xmlrpc_connect will block until the
837 proxy is ready, and throw a TestError if the server isn't
838 ready by timeout_seconds.
839
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700840 @param command Shell command to start the server.
841 @param port Port number on which the server is expected to
842 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800843 @param command_name String to use as input to `pkill` to
844 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -0800845 @param ready_test_name String containing the name of a
846 method defined on the XMLRPC server.
847 @param timeout_seconds Number of seconds to wait
848 for the server to become 'ready.' Will throw a
849 TestFail error if server is not ready in time.
850
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700851 """
852 self.xmlrpc_disconnect(port)
853
854 # Chrome OS on the target closes down most external ports
855 # for security. We could open the port, but doing that
856 # would conflict with security tests that check that only
857 # expected ports are open. So, to get to the port on the
858 # target we use an ssh tunnel.
859 local_port = utils.get_unused_port()
860 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
861 ssh_cmd = make_ssh_command(opts=tunnel_options)
862 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
863 logging.debug('Full tunnel command: %s', tunnel_cmd)
864 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
865 logging.debug('Started XMLRPC tunnel, local = %d'
866 ' remote = %d, pid = %d',
867 local_port, port, tunnel_proc.pid)
868
869 # Start the server on the host. Redirection in the command
870 # below is necessary, because 'ssh' won't terminate until
871 # background child processes close stdin, stdout, and
872 # stderr.
873 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
874 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
875 logging.debug('Started XMLRPC server on host %s, pid = %s',
876 self.hostname, remote_pid)
877
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800878 self._xmlrpc_proxy_map[port] = (command_name, tunnel_proc)
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700879 rpc_url = 'http://localhost:%d' % local_port
Christopher Wileyd78249a2013-03-01 13:05:31 -0800880 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
881 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -0800882 # retry.retry logs each attempt; calculate delay_sec to
883 # keep log spam to a dull roar.
Christopher Wileyd78249a2013-03-01 13:05:31 -0800884 @retry.retry((socket.error, xmlrpclib.ProtocolError),
885 timeout_min=timeout_seconds/60.0,
J. Richard Barnette13eb7c02013-03-07 12:06:29 -0800886 delay_sec=min(max(timeout_seconds/20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -0800887 def ready_test():
888 """ Call proxy.ready_test_name(). """
889 getattr(proxy, ready_test_name)()
890 successful = False
891 try:
892 logging.info('Waiting %d seconds for XMLRPC server '
893 'to start.', timeout_seconds)
894 ready_test()
895 successful = True
896 except retry.TimeoutException:
897 raise error.TestError('Unable to start XMLRPC server after '
898 '%d seconds.' % timeout_seconds)
899 finally:
900 if not successful:
901 logging.error('Failed to start XMLRPC server.')
902 self.xmlrpc_disconnect(port)
903 logging.info('XMLRPC server started successfully.')
904 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700905
906 def xmlrpc_disconnect(self, port):
907 """Disconnect from an XMLRPC server on the host.
908
909 Terminates the remote XMLRPC server previously started for
910 the given `port`. Also closes the local ssh tunnel created
911 for the connection to the host. This function does not
912 directly alter the state of a previously returned XMLRPC
913 client object; however disconnection will cause all
914 subsequent calls to methods on the object to fail.
915
916 This function does nothing if requested to disconnect a port
917 that was not previously connected via `self.xmlrpc_connect()`
918
919 @param port Port number passed to a previous call to
920 `xmlrpc_connect()`
921 """
922 if port not in self._xmlrpc_proxy_map:
923 return
924 entry = self._xmlrpc_proxy_map[port]
925 remote_name = entry[0]
926 tunnel_proc = entry[1]
927 if remote_name:
928 # We use 'pkill' to find our target process rather than
929 # a PID, because the host may have rebooted since
930 # connecting, and we don't want to kill an innocent
931 # process with the same PID.
932 #
933 # 'pkill' helpfully exits with status 1 if no target
934 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -0700935 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700936 # status.
937 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
938
939 if tunnel_proc.poll() is None:
940 tunnel_proc.terminate()
941 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
942 else:
943 logging.debug('Tunnel pid %d terminated early, status %d',
944 tunnel_proc.pid, tunnel_proc.returncode)
945 del self._xmlrpc_proxy_map[port]
946
947
948 def xmlrpc_disconnect_all(self):
949 """Disconnect all known XMLRPC proxy ports."""
950 for port in self._xmlrpc_proxy_map.keys():
951 self.xmlrpc_disconnect(port)
952
953
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800954 def _ping_check_status(self, status):
955 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700956
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800957 @param status Check the ping status against this value.
958 @return True iff `status` and the result of ping are the same
959 (i.e. both True or both False).
960
961 """
962 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
963 return not (status ^ (ping_val == 0))
964
965 def _ping_wait_for_status(self, status, timeout):
966 """Wait for the host to have a given status (UP or DOWN).
967
968 Status is checked by polling. Polling will not last longer
969 than the number of seconds in `timeout`. The polling
970 interval will be long enough that only approximately
971 _PING_WAIT_COUNT polling cycles will be executed, subject
972 to a maximum interval of about one minute.
973
974 @param status Waiting will stop immediately if `ping` of the
975 host returns this status.
976 @param timeout Poll for at most this many seconds.
977 @return True iff the host status from `ping` matched the
978 requested status at the time of return.
979
980 """
981 # _ping_check_status() takes about 1 second, hence the
982 # "- 1" in the formula below.
983 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
984 end_time = time.time() + timeout
985 while time.time() <= end_time:
986 if self._ping_check_status(status):
987 return True
988 if poll_interval > 0:
989 time.sleep(poll_interval)
990
991 # The last thing we did was sleep(poll_interval), so it may
992 # have been too long since the last `ping`. Check one more
993 # time, just to be sure.
994 return self._ping_check_status(status)
995
996 def ping_wait_up(self, timeout):
997 """Wait for the host to respond to `ping`.
998
999 N.B. This method is not a reliable substitute for
1000 `wait_up()`, because a host that responds to ping will not
1001 necessarily respond to ssh. This method should only be used
1002 if the target DUT can be considered functional even if it
1003 can't be reached via ssh.
1004
1005 @param timeout Minimum time to allow before declaring the
1006 host to be non-responsive.
1007 @return True iff the host answered to ping before the timeout.
1008
1009 """
1010 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001011
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001012 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001013 """Wait until the host no longer responds to `ping`.
1014
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001015 This function can be used as a slightly faster version of
1016 `wait_down()`, by avoiding potentially long ssh timeouts.
1017
1018 @param timeout Minimum time to allow for the host to become
1019 non-responsive.
1020 @return True iff the host quit answering ping before the
1021 timeout.
1022
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001023 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001024 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001025
1026 def test_wait_for_sleep(self):
1027 """Wait for the client to enter low-power sleep mode.
1028
1029 The test for "is asleep" can't distinguish a system that is
1030 powered off; to confirm that the unit was asleep, it is
1031 necessary to force resume, and then call
1032 `test_wait_for_resume()`.
1033
1034 This function is expected to be called from a test as part
1035 of a sequence like the following:
1036
1037 ~~~~~~~~
1038 boot_id = host.get_boot_id()
1039 # trigger sleep on the host
1040 host.test_wait_for_sleep()
1041 # trigger resume on the host
1042 host.test_wait_for_resume(boot_id)
1043 ~~~~~~~~
1044
1045 @exception TestFail The host did not go to sleep within
1046 the allowed time.
1047 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001048 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001049 raise error.TestFail(
1050 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001051 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001052
1053
1054 def test_wait_for_resume(self, old_boot_id):
1055 """Wait for the client to resume from low-power sleep mode.
1056
1057 The `old_boot_id` parameter should be the value from
1058 `get_boot_id()` obtained prior to entering sleep mode. A
1059 `TestFail` exception is raised if the boot id changes.
1060
1061 See @ref test_wait_for_sleep for more on this function's
1062 usage.
1063
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001064 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001065 target host went to sleep.
1066
1067 @exception TestFail The host did not respond within the
1068 allowed time.
1069 @exception TestFail The host responded, but the boot id test
1070 indicated a reboot rather than a sleep
1071 cycle.
1072 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001073 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001074 raise error.TestFail(
1075 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001076 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001077 else:
1078 new_boot_id = self.get_boot_id()
1079 if new_boot_id != old_boot_id:
1080 raise error.TestFail(
1081 'client rebooted, but sleep was expected'
1082 ' (old boot %s, new boot %s)'
1083 % (old_boot_id, new_boot_id))
1084
1085
1086 def test_wait_for_shutdown(self):
1087 """Wait for the client to shut down.
1088
1089 The test for "has shut down" can't distinguish a system that
1090 is merely asleep; to confirm that the unit was down, it is
1091 necessary to force boot, and then call test_wait_for_boot().
1092
1093 This function is expected to be called from a test as part
1094 of a sequence like the following:
1095
1096 ~~~~~~~~
1097 boot_id = host.get_boot_id()
1098 # trigger shutdown on the host
1099 host.test_wait_for_shutdown()
1100 # trigger boot on the host
1101 host.test_wait_for_boot(boot_id)
1102 ~~~~~~~~
1103
1104 @exception TestFail The host did not shut down within the
1105 allowed time.
1106 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001107 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001108 raise error.TestFail(
1109 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001110 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001111
1112
1113 def test_wait_for_boot(self, old_boot_id=None):
1114 """Wait for the client to boot from cold power.
1115
1116 The `old_boot_id` parameter should be the value from
1117 `get_boot_id()` obtained prior to shutting down. A
1118 `TestFail` exception is raised if the boot id does not
1119 change. The boot id test is omitted if `old_boot_id` is not
1120 specified.
1121
1122 See @ref test_wait_for_shutdown for more on this function's
1123 usage.
1124
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001125 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001126 shut down.
1127
1128 @exception TestFail The host did not respond within the
1129 allowed time.
1130 @exception TestFail The host responded, but the boot id test
1131 indicated that there was no reboot.
1132 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001133 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001134 raise error.TestFail(
1135 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001136 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001137 elif old_boot_id:
1138 if self.get_boot_id() == old_boot_id:
1139 raise error.TestFail(
1140 'client is back up, but did not reboot'
1141 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001142
1143
1144 @staticmethod
1145 def check_for_rpm_support(hostname):
1146 """For a given hostname, return whether or not it is powered by an RPM.
1147
1148 @return None if this host does not follows the defined naming format
1149 for RPM powered DUT's in the lab. If it does follow the format,
1150 it returns a regular expression MatchObject instead.
1151 """
Richard Barnette82c35912012-11-20 10:09:10 -08001152 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001153
1154
1155 def has_power(self):
1156 """For this host, return whether or not it is powered by an RPM.
1157
1158 @return True if this host is in the CROS lab and follows the defined
1159 naming format.
1160 """
1161 return SiteHost.check_for_rpm_support(self.hostname)
1162
1163
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001164 def _set_power(self, state, power_method):
1165 """Sets the power to the host via RPM, Servo or manual.
1166
1167 @param state Specifies which power state to set to DUT
1168 @param power_method Specifies which method of power control to
1169 use. By default "RPM" will be used. Valid values
1170 are the strings "RPM", "manual", "servoj10".
1171
1172 """
1173 ACCEPTABLE_STATES = ['ON', 'OFF']
1174
1175 if state.upper() not in ACCEPTABLE_STATES:
1176 raise error.TestError('State must be one of: %s.'
1177 % (ACCEPTABLE_STATES,))
1178
1179 if power_method == self.POWER_CONTROL_SERVO:
1180 logging.info('Setting servo port J10 to %s', state)
1181 self.servo.set('prtctl3_pwren', state.lower())
1182 time.sleep(self._USB_POWER_TIMEOUT)
1183 elif power_method == self.POWER_CONTROL_MANUAL:
1184 logging.info('You have %d seconds to set the AC power to %s.',
1185 self._POWER_CYCLE_TIMEOUT, state)
1186 time.sleep(self._POWER_CYCLE_TIMEOUT)
1187 else:
1188 if not self.has_power():
1189 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001190 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1191 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1192 hostname=self.hostname)
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001193 rpm_client.set_power(self.hostname, state.upper())
Simran Basid5e5e272012-09-24 15:23:59 -07001194
1195
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001196 def power_off(self, power_method=POWER_CONTROL_RPM):
1197 """Turn off power to this host via RPM, Servo or manual.
1198
1199 @param power_method Specifies which method of power control to
1200 use. By default "RPM" will be used. Valid values
1201 are the strings "RPM", "manual", "servoj10".
1202
1203 """
1204 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001205
1206
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001207 def power_on(self, power_method=POWER_CONTROL_RPM):
1208 """Turn on power to this host via RPM, Servo or manual.
1209
1210 @param power_method Specifies which method of power control to
1211 use. By default "RPM" will be used. Valid values
1212 are the strings "RPM", "manual", "servoj10".
1213
1214 """
1215 self._set_power('ON', power_method)
1216
1217
1218 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1219 """Cycle power to this host by turning it OFF, then ON.
1220
1221 @param power_method Specifies which method of power control to
1222 use. By default "RPM" will be used. Valid values
1223 are the strings "RPM", "manual", "servoj10".
1224
1225 """
1226 if power_method in (self.POWER_CONTROL_SERVO,
1227 self.POWER_CONTROL_MANUAL):
1228 self.power_off(power_method=power_method)
1229 time.sleep(self._POWER_CYCLE_TIMEOUT)
1230 self.power_on(power_method=power_method)
1231 else:
1232 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001233
1234
1235 def get_platform(self):
1236 """Determine the correct platform label for this host.
1237
1238 @returns a string representing this host's platform.
1239 """
1240 crossystem = utils.Crossystem(self)
1241 crossystem.init()
1242 # Extract fwid value and use the leading part as the platform id.
1243 # fwid generally follow the format of {platform}.{firmware version}
1244 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1245 platform = crossystem.fwid().split('.')[0].lower()
1246 # Newer platforms start with 'Google_' while the older ones do not.
1247 return platform.replace('google_', '')
1248
1249
Aviv Keshet74c89a92013-02-04 15:18:30 -08001250 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001251 def get_board(self):
1252 """Determine the correct board label for this host.
1253
1254 @returns a string representing this host's board.
1255 """
1256 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1257 run_method=self.run)
1258 board = release_info['CHROMEOS_RELEASE_BOARD']
1259 # Devices in the lab generally have the correct board name but our own
1260 # development devices have {board_name}-signed-{key_type}. The board
1261 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001262 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001263 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001264 return board_format_string % board.split('-')[0]
1265 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001266
1267
Aviv Keshet74c89a92013-02-04 15:18:30 -08001268 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001269 def has_lightsensor(self):
1270 """Determine the correct board label for this host.
1271
1272 @returns the string 'lightsensor' if this host has a lightsensor or
1273 None if it does not.
1274 """
1275 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001276 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001277 try:
1278 # Run the search cmd following the symlinks. Stderr_tee is set to
1279 # None as there can be a symlink loop, but the command will still
1280 # execute correctly with a few messages printed to stderr.
1281 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1282 return 'lightsensor'
1283 except error.AutoservRunError:
1284 # egrep exited with a return code of 1 meaning none of the possible
1285 # lightsensor files existed.
1286 return None
1287
1288
Aviv Keshet74c89a92013-02-04 15:18:30 -08001289 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001290 def has_bluetooth(self):
1291 """Determine the correct board label for this host.
1292
1293 @returns the string 'bluetooth' if this host has bluetooth or
1294 None if it does not.
1295 """
1296 try:
1297 self.run('test -d /sys/class/bluetooth/hci0')
1298 # test exited with a return code of 0.
1299 return 'bluetooth'
1300 except error.AutoservRunError:
1301 # test exited with a return code 1 meaning the directory did not
1302 # exist.
1303 return None
1304
1305
1306 def get_labels(self):
1307 """Return a list of labels for this given host.
1308
1309 This is the main way to retrieve all the automatic labels for a host
1310 as it will run through all the currently implemented label functions.
1311 """
1312 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001313 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001314 label = label_function(self)
1315 if label:
1316 labels.append(label)
1317 return labels