blob: b8d2bb2998fde7bbdd7f036d2e4596629552ff4a [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
J. Richard Barnette1d78b012012-05-15 13:56:30 -07006import logging
Dan Shi0f466e82013-02-22 15:44:58 -08007import os
Simran Basid5e5e272012-09-24 15:23:59 -07008import re
Christopher Wileyd78249a2013-03-01 13:05:31 -08009import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070010import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070011import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070012import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070013
J. Richard Barnette45e93de2012-04-11 17:24:15 -070014from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080015from autotest_lib.client.common_lib import error
16from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080018from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080019from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080020from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070022from autotest_lib.server import autotest
J. Richard Barnette45e93de2012-04-11 17:24:15 -070023from autotest_lib.server import site_host_attributes
Scott Zawalski89c44dd2013-02-26 09:28:02 -050024from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070025from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
J. Richard Barnette75487572013-03-08 12:47:50 -080026from autotest_lib.server.cros.servo import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070027from autotest_lib.server.hosts import remote
Simran Basidcff4252012-11-20 16:13:20 -080028from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070029
Richard Barnette82c35912012-11-20 10:09:10 -080030# Importing frontend.afe.models requires a full Autotest
31# installation (with the Django modules), not just the source
32# repository. Most developers won't have the full installation, so
33# the imports below will fail for them.
34#
35# The fix is to catch import exceptions, and set `models` to `None`
36# on failure. This has the side effect that
37# SiteHost._get_board_from_afe() will fail: That will manifest as
38# failures during Repair jobs leaving the DUT as "Repair Failed".
39# In practice, you can't test Repair jobs without a full
40# installation, so that kind of failure isn't expected.
41try:
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080042 # pylint: disable=W0611
Richard Barnette82c35912012-11-20 10:09:10 -080043 from autotest_lib.frontend import setup_django_environment
44 from autotest_lib.frontend.afe import models
45except:
46 models = None
47
Simran Basid5e5e272012-09-24 15:23:59 -070048
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080049def _make_servo_hostname(hostname):
50 host_parts = hostname.split('.')
51 host_parts[0] = host_parts[0] + '-servo'
52 return '.'.join(host_parts)
53
54
55def _get_lab_servo(target_hostname):
56 """Instantiate a Servo for |target_hostname| in the lab.
57
58 Assuming that |target_hostname| is a device in the CrOS test
59 lab, create and return a Servo object pointed at the servo
60 attached to that DUT. The servo in the test lab is assumed
61 to already have servod up and running on it.
62
63 @param target_hostname: device whose servo we want to target.
64 @return an appropriately configured Servo instance.
65 """
66 servo_host = _make_servo_hostname(target_hostname)
67 if utils.host_is_in_lab_zone(servo_host):
68 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080069 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080070 except: # pylint: disable=W0702
71 # TODO(jrbarnette): Long-term, if we can't get to
72 # a servo in the lab, we want to fail, so we should
73 # pass any exceptions along. Short-term, we're not
74 # ready to rely on servo, so we ignore failures.
75 pass
76 return None
77
78
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070079def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
80 connect_timeout=None, alive_interval=None):
81 """Override default make_ssh_command to use options tuned for Chrome OS.
82
83 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070084 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
85 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070086
Dale Curtisaa5eedb2011-08-23 16:18:52 -070087 - ServerAliveInterval=180; which causes SSH to ping connection every
88 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
89 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
90 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070091
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070092 - ServerAliveCountMax=3; consistency with remote_access.sh.
93
94 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
95 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070096
97 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
98 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070099
100 - SSH protocol forced to 2; needed for ServerAliveInterval.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800101
102 @param user User name to use for the ssh connection.
103 @param port Port on the target host to use for ssh connection.
104 @param opts Additional options to the ssh command.
105 @param hosts_file Ignored.
106 @param connect_timeout Ignored.
107 @param alive_interval Ignored.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700108 """
109 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
110 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -0700111 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
112 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
113 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700114 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700115
116
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800117
Aviv Keshet74c89a92013-02-04 15:18:30 -0800118def add_label_detector(label_function_list, label_list=None, label=None):
119 """Decorator used to group functions together into the provided list.
120 @param label_function_list: List of label detecting functions to add
121 decorated function to.
122 @param label_list: List of detectable labels to add detectable labels to.
123 (Default: None)
124 @param label: Label string that is detectable by this detection function
125 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800126 """
Simran Basic6f1f7a2012-10-16 10:47:46 -0700127 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -0800128 """
129 @param func: The function to be added as a detector.
130 """
131 label_function_list.append(func)
132 if label and label_list is not None:
133 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700134 return func
135 return add_func
136
137
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700138class SiteHost(remote.RemoteHost):
139 """Chromium OS specific subclass of Host."""
140
141 _parser = autoserv_parser.autoserv_parser
142
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800143 # Time to wait for new kernel to be marked successful after
144 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -0700145 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700146
Richard Barnette03a0c132012-11-05 12:40:35 -0800147 # Timeout values (in seconds) associated with various Chrome OS
148 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700149 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800150 # In general, a good rule of thumb is that the timeout can be up
151 # to twice the typical measured value on the slowest platform.
152 # The times here have not necessarily been empirically tested to
153 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700154 #
155 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800156 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
157 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700158 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800159 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800160 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700161 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800162 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800163 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800164 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700165 # REBOOT_TIMEOUT: Combination of shutdown and reboot times.
Richard Barnette03a0c132012-11-05 12:40:35 -0800166 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700167
168 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800169 RESUME_TIMEOUT = 10
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700170 BOOT_TIMEOUT = 45
171 USB_BOOT_TIMEOUT = 150
172 SHUTDOWN_TIMEOUT = 5
173 REBOOT_TIMEOUT = SHUTDOWN_TIMEOUT + BOOT_TIMEOUT
Richard Barnette03a0c132012-11-05 12:40:35 -0800174 _INSTALL_TIMEOUT = 240
175
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800176 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
177 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
178 _USB_POWER_TIMEOUT = 5
179 _POWER_CYCLE_TIMEOUT = 10
180
Richard Barnette03a0c132012-11-05 12:40:35 -0800181 _DEFAULT_SERVO_URL_FORMAT = ('/static/servo-images/'
182 '%(board)s_test_image.bin')
183
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800184
Richard Barnette82c35912012-11-20 10:09:10 -0800185 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
186 'rpm_recovery_boards', type=str).split(',')
187
188 _MAX_POWER_CYCLE_ATTEMPTS = 6
189 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
190 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
191 'host[0-9]+')
192 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
193 'in_illuminance0_raw',
194 'illuminance0_input']
195 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
196 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800197 _DETECTABLE_LABELS = []
198 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
199 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700200
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800201 # Constants used in ping_wait_up() and ping_wait_down().
202 #
203 # _PING_WAIT_COUNT is the approximate number of polling
204 # cycles to use when waiting for a host state change.
205 #
206 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
207 # for arguments to the internal _ping_wait_for_status()
208 # method.
209 _PING_WAIT_COUNT = 40
210 _PING_STATUS_DOWN = False
211 _PING_STATUS_UP = True
212
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800213 # Allowed values for the power_method argument.
214
215 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
216 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
217 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
218 POWER_CONTROL_RPM = 'RPM'
219 POWER_CONTROL_SERVO = 'servoj10'
220 POWER_CONTROL_MANUAL = 'manual'
221
222 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
223 POWER_CONTROL_SERVO,
224 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800225
Simran Basi5e6339a2013-03-21 11:34:32 -0700226 _RPM_OUTLET_CHANGED = 'outlet_changed'
227
J. Richard Barnette964fba02012-10-24 17:34:29 -0700228 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800229 def get_servo_arguments(args_dict):
230 """Extract servo options from `args_dict` and return the result.
231
232 Take the provided dictionary of argument options and return
233 a subset that represent standard arguments needed to
234 construct a servo object for a host. The intent is to
235 provide standard argument processing from run_remote_tests
236 for tests that require a servo to operate.
237
238 Recommended usage:
239 ~~~~~~~~
240 args_dict = utils.args_to_dict(args)
241 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
242 host = hosts.create_host(machine, servo_args=servo_args)
243 ~~~~~~~~
244
245 @param args_dict Dictionary from which to extract the servo
246 arguments.
247 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700248 servo_args = {}
249 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800250 if arg in args_dict:
251 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700252 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700253
J. Richard Barnette964fba02012-10-24 17:34:29 -0700254
255 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700256 """Initialize superclasses, and |self.servo|.
257
258 For creating the host servo object, there are three
259 possibilities: First, if the host is a lab system known to
260 have a servo board, we connect to that servo unconditionally.
261 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700262 servo features for testing, it will pass settings for
263 `servo_host`, `servo_port`, or both. If neither of these
264 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700265
266 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700267 super(SiteHost, self)._initialize(hostname=hostname,
268 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700269 # self.env is a dictionary of environment variable settings
270 # to be exported for commands run on the host.
271 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
272 # errors that might happen.
273 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700274 self._xmlrpc_proxy_map = {}
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800275 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700276 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700277 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700278
279
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500280 def get_repair_image_name(self):
281 """Generate a image_name from variables in the global config.
282
283 @returns a str of $board-version/$BUILD.
284
285 """
286 stable_version = global_config.global_config.get_config_value(
287 'CROS', 'stable_cros_version')
288 build_pattern = global_config.global_config.get_config_value(
289 'CROS', 'stable_build_pattern')
290 board = self._get_board_from_afe()
291 if board is None:
292 raise error.AutoservError('DUT has no board attribute, '
293 'cannot be repaired.')
294 return build_pattern % (board, stable_version)
295
296
297 def clear_cros_version_labels_and_job_repo_url(self):
298 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalskieadbf702013-03-14 09:23:06 -0400299 try:
300 host_model = models.Host.objects.get(hostname=self.hostname)
301 except models.Host.DoesNotExist:
302 return
303
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500304 for label in host_model.labels.iterator():
305 if not label.name.startswith(ds_constants.VERSION_PREFIX):
306 continue
Dan Shi0f466e82013-02-22 15:44:58 -0800307
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500308 label.host_set.remove(host_model)
309
310 host_model.set_or_delete_attribute('job_repo_url', None)
311
312
Scott Zawalskieadbf702013-03-14 09:23:06 -0400313 def add_cros_version_labels_and_job_repo_url(self, image_name):
314 """Add cros_version labels and host attribute job_repo_url.
315
316 @param image_name: The name of the image e.g.
317 lumpy-release/R27-3837.0.0
318 """
319 try:
320 host_model = models.Host.objects.get(hostname=self.hostname)
321 except models.Host.DoesNotExist:
322 return
323 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
324 devserver_url = dev_server.ImageServer.resolve(image_name).url()
325 try:
326 label_model = models.Label.objects.get(name=cros_label)
327 except models.Label.DoesNotExist:
328 label_model = models.Label.objects.create(name=cros_label)
329 host_model.labels.add(label_model)
330 repo_url = tools.get_package_url(devserver_url, image_name)
331 host_model.set_or_delete_attribute('job_repo_url', repo_url)
332
333
Dan Shi0f466e82013-02-22 15:44:58 -0800334 def _try_stateful_update(self, update_url, force_update, updater):
335 """Try to use stateful update to initialize DUT.
336
337 When DUT is already running the same version that machine_install
338 tries to install, stateful update is a much faster way to clean up
339 the DUT for testing, compared to a full reimage. It is implemeted
340 by calling autoupdater.run_update, but skipping updating root, as
341 updating the kernel is time consuming and not necessary.
342
343 @param update_url: url of the image.
344 @param force_update: Set to True to update the image even if the DUT
345 is running the same version.
346 @param updater: ChromiumOSUpdater instance used to update the DUT.
347 @returns: True if the DUT was updated with stateful update.
348
349 """
Dan Shi7b7379d2013-03-19 16:26:33 -0700350 # Stateful update is disabled until lsb-release has rc build info.
351 logging.info('Stateful update only is disabled.')
352 return False
Dan Shi0f466e82013-02-22 15:44:58 -0800353 if not updater.check_version():
354 return False
355 if not force_update:
356 logging.info('Canceling stateful update because the new and '
357 'old versions are the same.')
358 return False
359 # Following folders should be rebuilt after stateful update.
360 # A test file is used to confirm each folder gets rebuilt after
361 # the stateful update.
362 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
363 test_file = '.test_file_to_be_deleted'
364 for folder in folders_to_check:
365 touch_path = os.path.join(folder, test_file)
366 self.run('touch %s' % touch_path)
367
368 if not updater.run_update(force_update=True, update_root=False):
369 return False
370
371 # Reboot to complete stateful update.
372 self.reboot(timeout=60, wait=True)
373 check_file_cmd = 'test -f %s; echo $?'
374 for folder in folders_to_check:
375 test_file_path = os.path.join(folder, test_file)
376 result = self.run(check_file_cmd % test_file_path,
377 ignore_status=True)
378 if result.exit_status == 1:
379 return False
380 return True
381
382
383 def _post_update_processing(self, updater, inactive_kernel=None):
384 """After the DUT is updated, confirm machine_install succeeded.
385
386 @param updater: ChromiumOSUpdater instance used to update the DUT.
387 @param inactive_kernel: kernel state of inactive kernel before reboot.
388
389 """
390
391 # Touch the lab machine file to leave a marker that distinguishes
392 # this image from other test images.
393 self.run('touch %s' % self._LAB_MACHINE_FILE)
394
395 # Kick off the autoreboot script as the _LAB_MACHINE_FILE was
396 # missing on the first boot.
397 self.run('start autoreboot')
398
399 # Following the reboot, verify the correct version.
400 if not updater.check_version():
401 # Print out crossystem to make it easier to debug the rollback.
402 logging.debug('Dumping partition table.')
Dan Shi346725f2013-03-20 15:22:38 -0700403 self.run('cgpt show $(rootdev -s -d)')
Dan Shi0f466e82013-02-22 15:44:58 -0800404 logging.debug('Dumping crossystem for firmware debugging.')
Dan Shi346725f2013-03-20 15:22:38 -0700405 self.run('crossystem --all')
Dan Shi0f466e82013-02-22 15:44:58 -0800406 logging.error('Expected Chromium OS version: %s. '
407 'Found Chromium OS %s',
Dan Shi346725f2013-03-20 15:22:38 -0700408 updater.update_version, updater.get_build_id())
409 raise autoupdater.ChromiumOSError('Updater failed on host %s' %
410 self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800411
412 # Figure out newly active kernel.
413 new_active_kernel, _ = updater.get_kernel_state()
414
415 # Ensure that previously inactive kernel is now the active kernel.
416 if inactive_kernel and new_active_kernel != inactive_kernel:
417 raise autoupdater.ChromiumOSError(
418 'Update failed. New kernel partition is not active after'
419 ' boot.')
420
Scott Zawalskieadbf702013-03-14 09:23:06 -0400421 try:
422 host_attributes = site_host_attributes.HostAttributes(self.hostname)
423 except models.Host.DoesNotExist:
424 host_attributes = None
425 if host_attributes and host_attributes.has_chromeos_firmware:
Dan Shi0f466e82013-02-22 15:44:58 -0800426 # Wait until tries == 0 and success, or until timeout.
427 utils.poll_for_condition(
428 lambda: (updater.get_kernel_tries(new_active_kernel) == 0
429 and updater.get_kernel_success(new_active_kernel)),
430 exception=autoupdater.ChromiumOSError(
431 'Update failed. Timed out waiting for system to mark'
432 ' new kernel as successful.'),
433 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
434
435
Scott Zawalskieadbf702013-03-14 09:23:06 -0400436 def _stage_build_and_return_update_url(self, image_name):
437 """Stage a build on a devserver and return the update_url.
438
439 @param image_name: a name like lumpy-release/R27-3837.0.0
440 @returns an update URL like:
441 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
442 """
443 logging.info('Staging requested build: %s', image_name)
444 devserver = dev_server.ImageServer.resolve(image_name)
445 devserver.trigger_download(image_name, synchronous=False)
446 return tools.image_url_pattern() % (devserver.url(), image_name)
447
448
Chris Sosaa3ac2152012-05-23 22:23:13 -0700449 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500450 local_devserver=False, repair=False):
451 """Install the DUT.
452
Dan Shi0f466e82013-02-22 15:44:58 -0800453 Use stateful update if the DUT is already running the same build.
454 Stateful update does not update kernel and tends to run much faster
455 than a full reimage. If the DUT is running a different build, or it
456 failed to do a stateful update, full update, including kernel update,
457 will be applied to the DUT.
458
Scott Zawalskieadbf702013-03-14 09:23:06 -0400459 Once a host enters machine_install its cros_version label will be
460 removed as well as its host attribute job_repo_url (used for
461 package install).
462
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500463 @param update_url: The url to use for the update
464 pattern: http://$devserver:###/update/$build
465 If update_url is None and repair is True we will install the
466 stable image listed in global_config under
467 CROS.stable_cros_version.
468 @param force_update: Force an update even if the version installed
469 is the same. Default:False
470 @param local_devserver: Used by run_remote_test to allow people to
471 use their local devserver. Default: False
472 @param repair: Whether or not we are in repair mode. This adds special
473 cases for repairing a machine like starting update_engine.
474 Setting repair to True sets force_update to True as well.
475 default: False
476 @raises autoupdater.ChromiumOSError
477
478 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700479 if not update_url and self._parser.options.image:
Scott Zawalskieadbf702013-03-14 09:23:06 -0400480 requested_build = self._parser.options.image
481 if requested_build.startswith('http://'):
482 update_url = requested_build
483 else:
484 # Try to stage any build that does not start with http:// on
485 # the devservers defined in global_config.ini.
486 update_url = self._stage_build_and_return_update_url(
487 requested_build)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500488 elif not update_url and not repair:
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700489 raise autoupdater.ChromiumOSError(
490 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500491 elif not update_url and repair:
Scott Zawalskieadbf702013-03-14 09:23:06 -0400492 update_url = self._stage_build_and_return_update_url(
493 self.get_repair_image_name())
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500494
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500495 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800496 # In case the system is in a bad state, we always reboot the machine
497 # before machine_install.
498 self.reboot(timeout=60, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500499 self.run('stop update-engine; start update-engine')
500 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800501
Chris Sosaa3ac2152012-05-23 22:23:13 -0700502 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Dan Shi0f466e82013-02-22 15:44:58 -0800503 local_devserver=local_devserver)
504 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400505 # Remove cros-version and job_repo_url host attribute from host.
506 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800507 # If the DUT is already running the same build, try stateful update
508 # first. Stateful update does not update kernel and tends to run much
509 # faster than a full reimage.
510 try:
511 updated = self._try_stateful_update(update_url, force_update,
512 updater)
513 if updated:
514 logging.info('DUT is updated with stateful update.')
515 except Exception as e:
516 logging.exception(e)
517 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700518
Dan Shi0f466e82013-02-22 15:44:58 -0800519 inactive_kernel = None
520 # Do a full update if stateful update is not applicable or failed.
521 if not updated:
522 # In case the system is in a bad state, we always reboot the
523 # machine before machine_install.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700524 self.reboot(timeout=60, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700525
526 # TODO(sosa): Remove temporary hack to get rid of bricked machines
527 # that can't update due to a corrupted policy.
528 self.run('rm -rf /var/lib/whitelist')
529 self.run('touch /var/lib/whitelist')
530 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400531 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700532
Dan Shi0f466e82013-02-22 15:44:58 -0800533 if updater.run_update(force_update):
534 updated = True
535 # Figure out active and inactive kernel.
536 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700537
Dan Shi0f466e82013-02-22 15:44:58 -0800538 # Ensure inactive kernel has higher priority than active.
539 if (updater.get_kernel_priority(inactive_kernel)
540 < updater.get_kernel_priority(active_kernel)):
541 raise autoupdater.ChromiumOSError(
542 'Update failed. The priority of the inactive kernel'
543 ' partition is less than that of the active kernel'
544 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700545
Dan Shi0f466e82013-02-22 15:44:58 -0800546 update_engine_log = '/var/log/update_engine.log'
547 logging.info('Dumping %s', update_engine_log)
548 self.run('cat %s' % update_engine_log)
549 # Updater has returned successfully; reboot the host.
550 self.reboot(timeout=60, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700551
Dan Shi0f466e82013-02-22 15:44:58 -0800552 if updated:
553 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400554 image_name = autoupdater.url_to_image_name(update_url)
555 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800556
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700557 # Clean up any old autotest directories which may be lying around.
558 for path in global_config.global_config.get_config_value(
559 'AUTOSERV', 'client_autodir_paths', type=list):
560 self.run('rm -rf ' + path)
561
562
Simran Basi833814b2013-01-29 13:13:43 -0800563 def _get_label_from_afe(self, label_prefix):
564 """Retrieve a host's specific label from the AFE.
565
566 Looks for a host label that has the form <label_prefix>:<value>
567 and returns the "<value>" part of the label. None is returned
568 if there is not a label matching the pattern
569
570 @returns the label that matches the prefix or 'None'
571 """
572 host_model = models.Host.objects.get(hostname=self.hostname)
573 host_label = host_model.labels.get(name__startswith=label_prefix)
574 if not host_label:
575 return None
576 return host_label.name.split(label_prefix, 1)[1]
577
578
Richard Barnette82c35912012-11-20 10:09:10 -0800579 def _get_board_from_afe(self):
580 """Retrieve this host's board from its labels in the AFE.
581
582 Looks for a host label of the form "board:<board>", and
583 returns the "<board>" part of the label. `None` is returned
584 if there is not a single, unique label matching the pattern.
585
586 @returns board from label, or `None`.
587 """
Simran Basi833814b2013-01-29 13:13:43 -0800588 return self._get_label_from_afe(ds_constants.BOARD_PREFIX)
589
590
591 def get_build(self):
592 """Retrieve the current build for this Host from the AFE.
593
594 Looks through this host's labels in the AFE to determine its build.
595
596 @returns The current build or None if it could not find it or if there
597 were multiple build labels assigned to this host.
598 """
599 return self._get_label_from_afe(ds_constants.VERSION_PREFIX)
Richard Barnette82c35912012-11-20 10:09:10 -0800600
601
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500602 def _install_repair(self):
603 """Attempt to repair this host using upate-engine.
604
605 If the host is up, try installing the DUT with a stable
606 "repair" version of Chrome OS as defined in the global_config
607 under CROS.stable_cros_version.
608
609 @returns True if successful, False if update_engine failed.
610
611 """
612 if not self.is_up():
613 return False
614
615 logging.info('Attempting to reimage machine to repair image.')
616 try:
617 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700618 except autoupdater.ChromiumOSError as e:
619 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500620 logging.info('Repair via install failed.')
621 return False
622
623 return True
624
625
Richard Barnette03a0c132012-11-05 12:40:35 -0800626 def _servo_repair(self, board):
627 """Attempt to repair this host using an attached Servo.
628
629 Re-install the OS on the DUT by 1) installing a test image
630 on a USB storage device attached to the Servo board,
631 2) booting that image in recovery mode, and then
632 3) installing the image.
633
634 """
635 server = dev_server.ImageServer.devserver_url_for_servo(board)
636 image = server + (self._DEFAULT_SERVO_URL_FORMAT %
637 { 'board': board })
638 self.servo.install_recovery_image(image)
639 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
640 raise error.AutoservError('DUT failed to boot from USB'
641 ' after %d seconds' %
642 self.USB_BOOT_TIMEOUT)
643 self.run('chromeos-install --yes',
644 timeout=self._INSTALL_TIMEOUT)
645 self.servo.power_long_press()
646 self.servo.set('usb_mux_sel1', 'servo_sees_usbkey')
647 self.servo.power_short_press()
648 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
649 raise error.AutoservError('DUT failed to reboot installed '
650 'test image after %d seconds' %
651 self.BOOT_TIMEOUT)
652
653
Richard Barnette82c35912012-11-20 10:09:10 -0800654 def _powercycle_to_repair(self):
655 """Utilize the RPM Infrastructure to bring the host back up.
656
657 If the host is not up/repaired after the first powercycle we utilize
658 auto fallback to the last good install by powercycling and rebooting the
659 host 6 times.
660 """
661 logging.info('Attempting repair via RPM powercycle.')
662 failed_cycles = 0
663 self.power_cycle()
664 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
665 failed_cycles += 1
666 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
667 raise error.AutoservError('Powercycled host %s %d times; '
668 'device did not come back online.' %
669 (self.hostname, failed_cycles))
670 self.power_cycle()
671 if failed_cycles == 0:
672 logging.info('Powercycling was successful first time.')
673 else:
674 logging.info('Powercycling was successful after %d failures.',
675 failed_cycles)
676
677
678 def repair_full(self):
679 """Repair a host for repair level NO_PROTECTION.
680
681 This overrides the base class function for repair; it does
682 not call back to the parent class, but instead offers a
683 simplified implementation based on the capabilities in the
684 Chrome OS test lab.
685
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700686 If `self.verify()` fails, the following procedures are
687 attempted:
688 1. Try to re-install to a known stable image using
689 auto-update.
690 2. If there's a servo for the DUT, try to re-install via
691 the servo.
692 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800693 by power-cycling.
694
695 As with the parent method, the last operation performed on
696 the DUT must be to call `self.verify()`; if that call fails,
697 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700698
Richard Barnette82c35912012-11-20 10:09:10 -0800699 """
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400700 host_board = self._get_board_from_afe()
701 if host_board is None:
702 logging.error('host %s has no board; failing repair',
703 self.hostname)
704 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500705
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400706 if not self._install_repair():
707 # TODO(scottz): All repair pathways should be
708 # executed until we've exhausted all options. Below
709 # we favor servo over powercycle when we really
710 # should be falling back to power if servo fails.
J. Richard Barnette69929a52013-03-15 13:22:11 -0700711 if (self.servo and self.servo.recovery_supported()):
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400712 self._servo_repair(host_board)
713 elif (self.has_power() and
714 host_board in self._RPM_RECOVERY_BOARDS):
715 self._powercycle_to_repair()
716 else:
717 logging.error('host %s has no servo and no RPM control; '
718 'failing repair', self.hostname)
719 raise
720 self.verify()
Richard Barnette82c35912012-11-20 10:09:10 -0800721
722
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700723 def close(self):
724 super(SiteHost, self).close()
725 self.xmlrpc_disconnect_all()
726
727
Simran Basi5e6339a2013-03-21 11:34:32 -0700728 def _cleanup_poweron(self):
729 """Special cleanup method to make sure hosts always get power back."""
730 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
731 hosts = afe.get_hosts(hostname=self.hostname)
732 if not hosts or not (self._RPM_OUTLET_CHANGED in
733 hosts[0].attributes):
734 return
735 logging.debug('This host has recently interacted with the RPM'
736 ' Infrastructure. Ensuring power is on.')
737 try:
738 self.power_on()
739 except rpm_client.RemotePowerException:
740 # If cleanup has completed but there was an issue with the RPM
741 # Infrastructure, log an error message rather than fail cleanup
742 logging.error('Failed to turn Power On for this host after '
743 'cleanup through the RPM Infrastructure.')
744 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
745 hostname=self.hostname)
746
747
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700748 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700749 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800750 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500751 try:
752 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
753 '_clear_login_prompt_state')
754 self.run('restart ui')
755 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
756 '_wait_for_login_prompt')
Alex Millerf4517962013-02-25 15:03:02 -0800757 except (error.AutotestRunError, error.AutoservRunError):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500758 logging.warn('Unable to restart ui, rebooting device.')
759 # Since restarting the UI fails fall back to normal Autotest
760 # cleanup routines, i.e. reboot the machine.
761 super(SiteHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -0700762 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -0700763 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -0700764 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700765
766
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700767 def reboot(self, **dargs):
768 """
769 This function reboots the site host. The more generic
770 RemoteHost.reboot() performs sync and sleeps for 5
771 seconds. This is not necessary for Chrome OS devices as the
772 sync should be finished in a short time during the reboot
773 command.
774 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800775 if 'reboot_cmd' not in dargs:
776 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
777 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700778 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800779 if 'fastsync' not in dargs:
780 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700781 super(SiteHost, self).reboot(**dargs)
782
783
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700784 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800785 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700786
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800787 Tests for the following conditions:
788 1. All conditions tested by the parent version of this
789 function.
790 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -0700791 3. Sufficient space in /mnt/stateful_partition/encrypted.
792 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700793
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700794 """
795 super(SiteHost, self).verify_software()
796 self.check_diskspace(
797 '/mnt/stateful_partition',
798 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -0700799 'SERVER', 'gb_diskspace_required', type=float,
800 default=20.0))
801 self.check_diskspace(
802 '/mnt/stateful_partition/encrypted',
803 global_config.global_config.get_config_value(
804 'SERVER', 'gb_encrypted_diskspace_required', type=float,
805 default=0.1))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800806 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -0500807 # Makes sure python is present, loads and can use built in functions.
808 # We have seen cases where importing cPickle fails with undefined
809 # symbols in cPickle.so.
810 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700811
812
Christopher Wileyd78249a2013-03-01 13:05:31 -0800813 def xmlrpc_connect(self, command, port, command_name=None,
814 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700815 """Connect to an XMLRPC server on the host.
816
817 The `command` argument should be a simple shell command that
818 starts an XMLRPC server on the given `port`. The command
819 must not daemonize, and must terminate cleanly on SIGTERM.
820 The command is started in the background on the host, and a
821 local XMLRPC client for the server is created and returned
822 to the caller.
823
824 Note that the process of creating an XMLRPC client makes no
825 attempt to connect to the remote server; the caller is
826 responsible for determining whether the server is running
827 correctly, and is ready to serve requests.
828
Christopher Wileyd78249a2013-03-01 13:05:31 -0800829 Optionally, the caller can pass ready_test_name, a string
830 containing the name of a method to call on the proxy. This
831 method should take no parameters and return successfully only
832 when the server is ready to process client requests. When
833 ready_test_name is set, xmlrpc_connect will block until the
834 proxy is ready, and throw a TestError if the server isn't
835 ready by timeout_seconds.
836
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700837 @param command Shell command to start the server.
838 @param port Port number on which the server is expected to
839 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800840 @param command_name String to use as input to `pkill` to
841 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -0800842 @param ready_test_name String containing the name of a
843 method defined on the XMLRPC server.
844 @param timeout_seconds Number of seconds to wait
845 for the server to become 'ready.' Will throw a
846 TestFail error if server is not ready in time.
847
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700848 """
849 self.xmlrpc_disconnect(port)
850
851 # Chrome OS on the target closes down most external ports
852 # for security. We could open the port, but doing that
853 # would conflict with security tests that check that only
854 # expected ports are open. So, to get to the port on the
855 # target we use an ssh tunnel.
856 local_port = utils.get_unused_port()
857 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
858 ssh_cmd = make_ssh_command(opts=tunnel_options)
859 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
860 logging.debug('Full tunnel command: %s', tunnel_cmd)
861 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
862 logging.debug('Started XMLRPC tunnel, local = %d'
863 ' remote = %d, pid = %d',
864 local_port, port, tunnel_proc.pid)
865
866 # Start the server on the host. Redirection in the command
867 # below is necessary, because 'ssh' won't terminate until
868 # background child processes close stdin, stdout, and
869 # stderr.
870 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
871 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
872 logging.debug('Started XMLRPC server on host %s, pid = %s',
873 self.hostname, remote_pid)
874
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800875 self._xmlrpc_proxy_map[port] = (command_name, tunnel_proc)
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700876 rpc_url = 'http://localhost:%d' % local_port
Christopher Wileyd78249a2013-03-01 13:05:31 -0800877 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
878 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -0800879 # retry.retry logs each attempt; calculate delay_sec to
880 # keep log spam to a dull roar.
Christopher Wileyd78249a2013-03-01 13:05:31 -0800881 @retry.retry((socket.error, xmlrpclib.ProtocolError),
882 timeout_min=timeout_seconds/60.0,
J. Richard Barnette13eb7c02013-03-07 12:06:29 -0800883 delay_sec=min(max(timeout_seconds/20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -0800884 def ready_test():
885 """ Call proxy.ready_test_name(). """
886 getattr(proxy, ready_test_name)()
887 successful = False
888 try:
889 logging.info('Waiting %d seconds for XMLRPC server '
890 'to start.', timeout_seconds)
891 ready_test()
892 successful = True
893 except retry.TimeoutException:
894 raise error.TestError('Unable to start XMLRPC server after '
895 '%d seconds.' % timeout_seconds)
896 finally:
897 if not successful:
898 logging.error('Failed to start XMLRPC server.')
899 self.xmlrpc_disconnect(port)
900 logging.info('XMLRPC server started successfully.')
901 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700902
903 def xmlrpc_disconnect(self, port):
904 """Disconnect from an XMLRPC server on the host.
905
906 Terminates the remote XMLRPC server previously started for
907 the given `port`. Also closes the local ssh tunnel created
908 for the connection to the host. This function does not
909 directly alter the state of a previously returned XMLRPC
910 client object; however disconnection will cause all
911 subsequent calls to methods on the object to fail.
912
913 This function does nothing if requested to disconnect a port
914 that was not previously connected via `self.xmlrpc_connect()`
915
916 @param port Port number passed to a previous call to
917 `xmlrpc_connect()`
918 """
919 if port not in self._xmlrpc_proxy_map:
920 return
921 entry = self._xmlrpc_proxy_map[port]
922 remote_name = entry[0]
923 tunnel_proc = entry[1]
924 if remote_name:
925 # We use 'pkill' to find our target process rather than
926 # a PID, because the host may have rebooted since
927 # connecting, and we don't want to kill an innocent
928 # process with the same PID.
929 #
930 # 'pkill' helpfully exits with status 1 if no target
931 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -0700932 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700933 # status.
934 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
935
936 if tunnel_proc.poll() is None:
937 tunnel_proc.terminate()
938 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
939 else:
940 logging.debug('Tunnel pid %d terminated early, status %d',
941 tunnel_proc.pid, tunnel_proc.returncode)
942 del self._xmlrpc_proxy_map[port]
943
944
945 def xmlrpc_disconnect_all(self):
946 """Disconnect all known XMLRPC proxy ports."""
947 for port in self._xmlrpc_proxy_map.keys():
948 self.xmlrpc_disconnect(port)
949
950
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800951 def _ping_check_status(self, status):
952 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700953
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800954 @param status Check the ping status against this value.
955 @return True iff `status` and the result of ping are the same
956 (i.e. both True or both False).
957
958 """
959 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
960 return not (status ^ (ping_val == 0))
961
962 def _ping_wait_for_status(self, status, timeout):
963 """Wait for the host to have a given status (UP or DOWN).
964
965 Status is checked by polling. Polling will not last longer
966 than the number of seconds in `timeout`. The polling
967 interval will be long enough that only approximately
968 _PING_WAIT_COUNT polling cycles will be executed, subject
969 to a maximum interval of about one minute.
970
971 @param status Waiting will stop immediately if `ping` of the
972 host returns this status.
973 @param timeout Poll for at most this many seconds.
974 @return True iff the host status from `ping` matched the
975 requested status at the time of return.
976
977 """
978 # _ping_check_status() takes about 1 second, hence the
979 # "- 1" in the formula below.
980 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
981 end_time = time.time() + timeout
982 while time.time() <= end_time:
983 if self._ping_check_status(status):
984 return True
985 if poll_interval > 0:
986 time.sleep(poll_interval)
987
988 # The last thing we did was sleep(poll_interval), so it may
989 # have been too long since the last `ping`. Check one more
990 # time, just to be sure.
991 return self._ping_check_status(status)
992
993 def ping_wait_up(self, timeout):
994 """Wait for the host to respond to `ping`.
995
996 N.B. This method is not a reliable substitute for
997 `wait_up()`, because a host that responds to ping will not
998 necessarily respond to ssh. This method should only be used
999 if the target DUT can be considered functional even if it
1000 can't be reached via ssh.
1001
1002 @param timeout Minimum time to allow before declaring the
1003 host to be non-responsive.
1004 @return True iff the host answered to ping before the timeout.
1005
1006 """
1007 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001008
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001009 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001010 """Wait until the host no longer responds to `ping`.
1011
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001012 This function can be used as a slightly faster version of
1013 `wait_down()`, by avoiding potentially long ssh timeouts.
1014
1015 @param timeout Minimum time to allow for the host to become
1016 non-responsive.
1017 @return True iff the host quit answering ping before the
1018 timeout.
1019
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001020 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001021 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001022
1023 def test_wait_for_sleep(self):
1024 """Wait for the client to enter low-power sleep mode.
1025
1026 The test for "is asleep" can't distinguish a system that is
1027 powered off; to confirm that the unit was asleep, it is
1028 necessary to force resume, and then call
1029 `test_wait_for_resume()`.
1030
1031 This function is expected to be called from a test as part
1032 of a sequence like the following:
1033
1034 ~~~~~~~~
1035 boot_id = host.get_boot_id()
1036 # trigger sleep on the host
1037 host.test_wait_for_sleep()
1038 # trigger resume on the host
1039 host.test_wait_for_resume(boot_id)
1040 ~~~~~~~~
1041
1042 @exception TestFail The host did not go to sleep within
1043 the allowed time.
1044 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001045 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001046 raise error.TestFail(
1047 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001048 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001049
1050
1051 def test_wait_for_resume(self, old_boot_id):
1052 """Wait for the client to resume from low-power sleep mode.
1053
1054 The `old_boot_id` parameter should be the value from
1055 `get_boot_id()` obtained prior to entering sleep mode. A
1056 `TestFail` exception is raised if the boot id changes.
1057
1058 See @ref test_wait_for_sleep for more on this function's
1059 usage.
1060
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001061 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001062 target host went to sleep.
1063
1064 @exception TestFail The host did not respond within the
1065 allowed time.
1066 @exception TestFail The host responded, but the boot id test
1067 indicated a reboot rather than a sleep
1068 cycle.
1069 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001070 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001071 raise error.TestFail(
1072 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001073 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001074 else:
1075 new_boot_id = self.get_boot_id()
1076 if new_boot_id != old_boot_id:
1077 raise error.TestFail(
1078 'client rebooted, but sleep was expected'
1079 ' (old boot %s, new boot %s)'
1080 % (old_boot_id, new_boot_id))
1081
1082
1083 def test_wait_for_shutdown(self):
1084 """Wait for the client to shut down.
1085
1086 The test for "has shut down" can't distinguish a system that
1087 is merely asleep; to confirm that the unit was down, it is
1088 necessary to force boot, and then call test_wait_for_boot().
1089
1090 This function is expected to be called from a test as part
1091 of a sequence like the following:
1092
1093 ~~~~~~~~
1094 boot_id = host.get_boot_id()
1095 # trigger shutdown on the host
1096 host.test_wait_for_shutdown()
1097 # trigger boot on the host
1098 host.test_wait_for_boot(boot_id)
1099 ~~~~~~~~
1100
1101 @exception TestFail The host did not shut down within the
1102 allowed time.
1103 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001104 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001105 raise error.TestFail(
1106 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001107 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001108
1109
1110 def test_wait_for_boot(self, old_boot_id=None):
1111 """Wait for the client to boot from cold power.
1112
1113 The `old_boot_id` parameter should be the value from
1114 `get_boot_id()` obtained prior to shutting down. A
1115 `TestFail` exception is raised if the boot id does not
1116 change. The boot id test is omitted if `old_boot_id` is not
1117 specified.
1118
1119 See @ref test_wait_for_shutdown for more on this function's
1120 usage.
1121
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001122 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001123 shut down.
1124
1125 @exception TestFail The host did not respond within the
1126 allowed time.
1127 @exception TestFail The host responded, but the boot id test
1128 indicated that there was no reboot.
1129 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001130 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001131 raise error.TestFail(
1132 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001133 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001134 elif old_boot_id:
1135 if self.get_boot_id() == old_boot_id:
1136 raise error.TestFail(
1137 'client is back up, but did not reboot'
1138 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001139
1140
1141 @staticmethod
1142 def check_for_rpm_support(hostname):
1143 """For a given hostname, return whether or not it is powered by an RPM.
1144
1145 @return None if this host does not follows the defined naming format
1146 for RPM powered DUT's in the lab. If it does follow the format,
1147 it returns a regular expression MatchObject instead.
1148 """
Richard Barnette82c35912012-11-20 10:09:10 -08001149 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001150
1151
1152 def has_power(self):
1153 """For this host, return whether or not it is powered by an RPM.
1154
1155 @return True if this host is in the CROS lab and follows the defined
1156 naming format.
1157 """
1158 return SiteHost.check_for_rpm_support(self.hostname)
1159
1160
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001161 def _set_power(self, state, power_method):
1162 """Sets the power to the host via RPM, Servo or manual.
1163
1164 @param state Specifies which power state to set to DUT
1165 @param power_method Specifies which method of power control to
1166 use. By default "RPM" will be used. Valid values
1167 are the strings "RPM", "manual", "servoj10".
1168
1169 """
1170 ACCEPTABLE_STATES = ['ON', 'OFF']
1171
1172 if state.upper() not in ACCEPTABLE_STATES:
1173 raise error.TestError('State must be one of: %s.'
1174 % (ACCEPTABLE_STATES,))
1175
1176 if power_method == self.POWER_CONTROL_SERVO:
1177 logging.info('Setting servo port J10 to %s', state)
1178 self.servo.set('prtctl3_pwren', state.lower())
1179 time.sleep(self._USB_POWER_TIMEOUT)
1180 elif power_method == self.POWER_CONTROL_MANUAL:
1181 logging.info('You have %d seconds to set the AC power to %s.',
1182 self._POWER_CYCLE_TIMEOUT, state)
1183 time.sleep(self._POWER_CYCLE_TIMEOUT)
1184 else:
1185 if not self.has_power():
1186 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001187 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1188 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1189 hostname=self.hostname)
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001190 rpm_client.set_power(self.hostname, state.upper())
Simran Basid5e5e272012-09-24 15:23:59 -07001191
1192
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001193 def power_off(self, power_method=POWER_CONTROL_RPM):
1194 """Turn off power to this host via RPM, Servo or manual.
1195
1196 @param power_method Specifies which method of power control to
1197 use. By default "RPM" will be used. Valid values
1198 are the strings "RPM", "manual", "servoj10".
1199
1200 """
1201 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001202
1203
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001204 def power_on(self, power_method=POWER_CONTROL_RPM):
1205 """Turn on power to this host via RPM, Servo or manual.
1206
1207 @param power_method Specifies which method of power control to
1208 use. By default "RPM" will be used. Valid values
1209 are the strings "RPM", "manual", "servoj10".
1210
1211 """
1212 self._set_power('ON', power_method)
1213
1214
1215 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1216 """Cycle power to this host by turning it OFF, then ON.
1217
1218 @param power_method Specifies which method of power control to
1219 use. By default "RPM" will be used. Valid values
1220 are the strings "RPM", "manual", "servoj10".
1221
1222 """
1223 if power_method in (self.POWER_CONTROL_SERVO,
1224 self.POWER_CONTROL_MANUAL):
1225 self.power_off(power_method=power_method)
1226 time.sleep(self._POWER_CYCLE_TIMEOUT)
1227 self.power_on(power_method=power_method)
1228 else:
1229 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001230
1231
1232 def get_platform(self):
1233 """Determine the correct platform label for this host.
1234
1235 @returns a string representing this host's platform.
1236 """
1237 crossystem = utils.Crossystem(self)
1238 crossystem.init()
1239 # Extract fwid value and use the leading part as the platform id.
1240 # fwid generally follow the format of {platform}.{firmware version}
1241 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1242 platform = crossystem.fwid().split('.')[0].lower()
1243 # Newer platforms start with 'Google_' while the older ones do not.
1244 return platform.replace('google_', '')
1245
1246
Aviv Keshet74c89a92013-02-04 15:18:30 -08001247 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001248 def get_board(self):
1249 """Determine the correct board label for this host.
1250
1251 @returns a string representing this host's board.
1252 """
1253 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1254 run_method=self.run)
1255 board = release_info['CHROMEOS_RELEASE_BOARD']
1256 # Devices in the lab generally have the correct board name but our own
1257 # development devices have {board_name}-signed-{key_type}. The board
1258 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001259 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001260 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001261 return board_format_string % board.split('-')[0]
1262 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001263
1264
Aviv Keshet74c89a92013-02-04 15:18:30 -08001265 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001266 def has_lightsensor(self):
1267 """Determine the correct board label for this host.
1268
1269 @returns the string 'lightsensor' if this host has a lightsensor or
1270 None if it does not.
1271 """
1272 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001273 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001274 try:
1275 # Run the search cmd following the symlinks. Stderr_tee is set to
1276 # None as there can be a symlink loop, but the command will still
1277 # execute correctly with a few messages printed to stderr.
1278 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1279 return 'lightsensor'
1280 except error.AutoservRunError:
1281 # egrep exited with a return code of 1 meaning none of the possible
1282 # lightsensor files existed.
1283 return None
1284
1285
Aviv Keshet74c89a92013-02-04 15:18:30 -08001286 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001287 def has_bluetooth(self):
1288 """Determine the correct board label for this host.
1289
1290 @returns the string 'bluetooth' if this host has bluetooth or
1291 None if it does not.
1292 """
1293 try:
1294 self.run('test -d /sys/class/bluetooth/hci0')
1295 # test exited with a return code of 0.
1296 return 'bluetooth'
1297 except error.AutoservRunError:
1298 # test exited with a return code 1 meaning the directory did not
1299 # exist.
1300 return None
1301
1302
1303 def get_labels(self):
1304 """Return a list of labels for this given host.
1305
1306 This is the main way to retrieve all the automatic labels for a host
1307 as it will run through all the currently implemented label functions.
1308 """
1309 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001310 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001311 label = label_function(self)
1312 if label:
1313 labels.append(label)
1314 return labels