blob: baff51e8876b1ff483fd097fdc2066b9fcdd372e [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07006import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import logging
Dan Shi0f466e82013-02-22 15:44:58 -08008import os
Simran Basid5e5e272012-09-24 15:23:59 -07009import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080010import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070011import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070012import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070013import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070014
J. Richard Barnette45e93de2012-04-11 17:24:15 -070015from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080016from autotest_lib.client.common_lib import error
17from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070018from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080019from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080020from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080021from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070022from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070023from autotest_lib.server import autotest
J. Richard Barnette45e93de2012-04-11 17:24:15 -070024from autotest_lib.server import site_host_attributes
Scott Zawalski89c44dd2013-02-26 09:28:02 -050025from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070026from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
J. Richard Barnette75487572013-03-08 12:47:50 -080027from autotest_lib.server.cros.servo import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070028from autotest_lib.server.hosts import remote
Simran Basidcff4252012-11-20 16:13:20 -080029from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070030
Richard Barnette82c35912012-11-20 10:09:10 -080031# Importing frontend.afe.models requires a full Autotest
32# installation (with the Django modules), not just the source
33# repository. Most developers won't have the full installation, so
34# the imports below will fail for them.
35#
36# The fix is to catch import exceptions, and set `models` to `None`
37# on failure. This has the side effect that
38# SiteHost._get_board_from_afe() will fail: That will manifest as
39# failures during Repair jobs leaving the DUT as "Repair Failed".
40# In practice, you can't test Repair jobs without a full
41# installation, so that kind of failure isn't expected.
42try:
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080043 # pylint: disable=W0611
Richard Barnette82c35912012-11-20 10:09:10 -080044 from autotest_lib.frontend import setup_django_environment
45 from autotest_lib.frontend.afe import models
46except:
47 models = None
48
Simran Basid5e5e272012-09-24 15:23:59 -070049
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080050def _make_servo_hostname(hostname):
51 host_parts = hostname.split('.')
52 host_parts[0] = host_parts[0] + '-servo'
53 return '.'.join(host_parts)
54
55
56def _get_lab_servo(target_hostname):
57 """Instantiate a Servo for |target_hostname| in the lab.
58
59 Assuming that |target_hostname| is a device in the CrOS test
60 lab, create and return a Servo object pointed at the servo
61 attached to that DUT. The servo in the test lab is assumed
62 to already have servod up and running on it.
63
64 @param target_hostname: device whose servo we want to target.
65 @return an appropriately configured Servo instance.
66 """
67 servo_host = _make_servo_hostname(target_hostname)
68 if utils.host_is_in_lab_zone(servo_host):
69 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080070 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080071 except: # pylint: disable=W0702
72 # TODO(jrbarnette): Long-term, if we can't get to
73 # a servo in the lab, we want to fail, so we should
74 # pass any exceptions along. Short-term, we're not
75 # ready to rely on servo, so we ignore failures.
76 pass
77 return None
78
79
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070080def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
81 connect_timeout=None, alive_interval=None):
82 """Override default make_ssh_command to use options tuned for Chrome OS.
83
84 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070085 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
86 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070087
Dale Curtisaa5eedb2011-08-23 16:18:52 -070088 - ServerAliveInterval=180; which causes SSH to ping connection every
89 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
90 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
91 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070092
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070093 - ServerAliveCountMax=3; consistency with remote_access.sh.
94
95 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
96 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070097
98 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
99 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -0700100
101 - SSH protocol forced to 2; needed for ServerAliveInterval.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800102
103 @param user User name to use for the ssh connection.
104 @param port Port on the target host to use for ssh connection.
105 @param opts Additional options to the ssh command.
106 @param hosts_file Ignored.
107 @param connect_timeout Ignored.
108 @param alive_interval Ignored.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700109 """
110 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
111 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -0700112 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
113 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
114 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700115 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700116
117
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800118
Aviv Keshet74c89a92013-02-04 15:18:30 -0800119def add_label_detector(label_function_list, label_list=None, label=None):
120 """Decorator used to group functions together into the provided list.
121 @param label_function_list: List of label detecting functions to add
122 decorated function to.
123 @param label_list: List of detectable labels to add detectable labels to.
124 (Default: None)
125 @param label: Label string that is detectable by this detection function
126 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800127 """
Simran Basic6f1f7a2012-10-16 10:47:46 -0700128 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -0800129 """
130 @param func: The function to be added as a detector.
131 """
132 label_function_list.append(func)
133 if label and label_list is not None:
134 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700135 return func
136 return add_func
137
138
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700139class SiteHost(remote.RemoteHost):
140 """Chromium OS specific subclass of Host."""
141
142 _parser = autoserv_parser.autoserv_parser
143
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800144 # Time to wait for new kernel to be marked successful after
145 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -0700146 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700147
Richard Barnette03a0c132012-11-05 12:40:35 -0800148 # Timeout values (in seconds) associated with various Chrome OS
149 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700150 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800151 # In general, a good rule of thumb is that the timeout can be up
152 # to twice the typical measured value on the slowest platform.
153 # The times here have not necessarily been empirically tested to
154 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700155 #
156 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800157 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
158 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700159 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800160 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800161 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700162 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800163 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800164 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800165 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700166 # REBOOT_TIMEOUT: Combination of shutdown and reboot times.
J. Richard Barnettecb934ac2013-04-01 14:27:54 -0700167 # _UPDATE_REBOOT_TIMEOUT: Time to allow for reboot after AU; this
168 # time provides no allowance for the 30 second dev-mode delay,
169 # but is deliberately generous to avoid try-job failures.
Richard Barnette03a0c132012-11-05 12:40:35 -0800170 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700171
172 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800173 RESUME_TIMEOUT = 10
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700174 BOOT_TIMEOUT = 45
175 USB_BOOT_TIMEOUT = 150
176 SHUTDOWN_TIMEOUT = 5
177 REBOOT_TIMEOUT = SHUTDOWN_TIMEOUT + BOOT_TIMEOUT
J. Richard Barnettecb934ac2013-04-01 14:27:54 -0700178 # TODO(jrbarnette) - temporarily set this value to 2 min to allow
179 # for http://crbug.com/224871. Reset to 1 minute once that bug
180 # is fixed.
181 _UPDATE_REBOOT_TIMEOUT = 120
Richard Barnette03a0c132012-11-05 12:40:35 -0800182 _INSTALL_TIMEOUT = 240
183
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800184 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
185 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
186 _USB_POWER_TIMEOUT = 5
187 _POWER_CYCLE_TIMEOUT = 10
188
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800189
Richard Barnette82c35912012-11-20 10:09:10 -0800190 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
191 'rpm_recovery_boards', type=str).split(',')
192
193 _MAX_POWER_CYCLE_ATTEMPTS = 6
194 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
195 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
196 'host[0-9]+')
197 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
198 'in_illuminance0_raw',
199 'illuminance0_input']
200 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
201 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800202 _DETECTABLE_LABELS = []
203 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
204 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700205
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800206 # Constants used in ping_wait_up() and ping_wait_down().
207 #
208 # _PING_WAIT_COUNT is the approximate number of polling
209 # cycles to use when waiting for a host state change.
210 #
211 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
212 # for arguments to the internal _ping_wait_for_status()
213 # method.
214 _PING_WAIT_COUNT = 40
215 _PING_STATUS_DOWN = False
216 _PING_STATUS_UP = True
217
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800218 # Allowed values for the power_method argument.
219
220 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
221 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
222 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
223 POWER_CONTROL_RPM = 'RPM'
224 POWER_CONTROL_SERVO = 'servoj10'
225 POWER_CONTROL_MANUAL = 'manual'
226
227 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
228 POWER_CONTROL_SERVO,
229 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800230
Simran Basi5e6339a2013-03-21 11:34:32 -0700231 _RPM_OUTLET_CHANGED = 'outlet_changed'
232
J. Richard Barnette964fba02012-10-24 17:34:29 -0700233 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800234 def get_servo_arguments(args_dict):
235 """Extract servo options from `args_dict` and return the result.
236
237 Take the provided dictionary of argument options and return
238 a subset that represent standard arguments needed to
239 construct a servo object for a host. The intent is to
240 provide standard argument processing from run_remote_tests
241 for tests that require a servo to operate.
242
243 Recommended usage:
244 ~~~~~~~~
245 args_dict = utils.args_to_dict(args)
246 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
247 host = hosts.create_host(machine, servo_args=servo_args)
248 ~~~~~~~~
249
250 @param args_dict Dictionary from which to extract the servo
251 arguments.
252 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700253 servo_args = {}
254 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800255 if arg in args_dict:
256 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700257 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700258
J. Richard Barnette964fba02012-10-24 17:34:29 -0700259
260 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700261 """Initialize superclasses, and |self.servo|.
262
263 For creating the host servo object, there are three
264 possibilities: First, if the host is a lab system known to
265 have a servo board, we connect to that servo unconditionally.
266 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700267 servo features for testing, it will pass settings for
268 `servo_host`, `servo_port`, or both. If neither of these
269 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700270
271 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700272 super(SiteHost, self)._initialize(hostname=hostname,
273 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700274 # self.env is a dictionary of environment variable settings
275 # to be exported for commands run on the host.
276 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
277 # errors that might happen.
278 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700279 self._xmlrpc_proxy_map = {}
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800280 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700281 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700282 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700283
284
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500285 def get_repair_image_name(self):
286 """Generate a image_name from variables in the global config.
287
288 @returns a str of $board-version/$BUILD.
289
290 """
291 stable_version = global_config.global_config.get_config_value(
292 'CROS', 'stable_cros_version')
293 build_pattern = global_config.global_config.get_config_value(
294 'CROS', 'stable_build_pattern')
295 board = self._get_board_from_afe()
296 if board is None:
297 raise error.AutoservError('DUT has no board attribute, '
298 'cannot be repaired.')
299 return build_pattern % (board, stable_version)
300
301
302 def clear_cros_version_labels_and_job_repo_url(self):
303 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalskieadbf702013-03-14 09:23:06 -0400304 try:
305 host_model = models.Host.objects.get(hostname=self.hostname)
306 except models.Host.DoesNotExist:
307 return
308
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500309 for label in host_model.labels.iterator():
310 if not label.name.startswith(ds_constants.VERSION_PREFIX):
311 continue
Dan Shi0f466e82013-02-22 15:44:58 -0800312
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500313 label.host_set.remove(host_model)
314
315 host_model.set_or_delete_attribute('job_repo_url', None)
316
317
Scott Zawalskieadbf702013-03-14 09:23:06 -0400318 def add_cros_version_labels_and_job_repo_url(self, image_name):
319 """Add cros_version labels and host attribute job_repo_url.
320
321 @param image_name: The name of the image e.g.
322 lumpy-release/R27-3837.0.0
323 """
324 try:
325 host_model = models.Host.objects.get(hostname=self.hostname)
326 except models.Host.DoesNotExist:
327 return
328 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
329 devserver_url = dev_server.ImageServer.resolve(image_name).url()
330 try:
331 label_model = models.Label.objects.get(name=cros_label)
332 except models.Label.DoesNotExist:
333 label_model = models.Label.objects.create(name=cros_label)
334 host_model.labels.add(label_model)
335 repo_url = tools.get_package_url(devserver_url, image_name)
336 host_model.set_or_delete_attribute('job_repo_url', repo_url)
337
338
Dan Shi0f466e82013-02-22 15:44:58 -0800339 def _try_stateful_update(self, update_url, force_update, updater):
340 """Try to use stateful update to initialize DUT.
341
342 When DUT is already running the same version that machine_install
343 tries to install, stateful update is a much faster way to clean up
344 the DUT for testing, compared to a full reimage. It is implemeted
345 by calling autoupdater.run_update, but skipping updating root, as
346 updating the kernel is time consuming and not necessary.
347
348 @param update_url: url of the image.
349 @param force_update: Set to True to update the image even if the DUT
350 is running the same version.
351 @param updater: ChromiumOSUpdater instance used to update the DUT.
352 @returns: True if the DUT was updated with stateful update.
353
354 """
355 if not updater.check_version():
356 return False
357 if not force_update:
358 logging.info('Canceling stateful update because the new and '
359 'old versions are the same.')
360 return False
361 # Following folders should be rebuilt after stateful update.
362 # A test file is used to confirm each folder gets rebuilt after
363 # the stateful update.
364 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
365 test_file = '.test_file_to_be_deleted'
366 for folder in folders_to_check:
367 touch_path = os.path.join(folder, test_file)
368 self.run('touch %s' % touch_path)
369
370 if not updater.run_update(force_update=True, update_root=False):
371 return False
372
373 # Reboot to complete stateful update.
J. Richard Barnettecb934ac2013-04-01 14:27:54 -0700374 self.reboot(timeout=self._UPDATE_REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800375 check_file_cmd = 'test -f %s; echo $?'
376 for folder in folders_to_check:
377 test_file_path = os.path.join(folder, test_file)
378 result = self.run(check_file_cmd % test_file_path,
379 ignore_status=True)
380 if result.exit_status == 1:
381 return False
382 return True
383
384
385 def _post_update_processing(self, updater, inactive_kernel=None):
386 """After the DUT is updated, confirm machine_install succeeded.
387
388 @param updater: ChromiumOSUpdater instance used to update the DUT.
389 @param inactive_kernel: kernel state of inactive kernel before reboot.
390
391 """
392
393 # Touch the lab machine file to leave a marker that distinguishes
394 # this image from other test images.
395 self.run('touch %s' % self._LAB_MACHINE_FILE)
396
397 # Kick off the autoreboot script as the _LAB_MACHINE_FILE was
398 # missing on the first boot.
399 self.run('start autoreboot')
400
401 # Following the reboot, verify the correct version.
Dan Shib95bb862013-03-22 16:29:28 -0700402 if not updater.check_version_to_confirm_install():
Dan Shi0f466e82013-02-22 15:44:58 -0800403 # Print out crossystem to make it easier to debug the rollback.
404 logging.debug('Dumping partition table.')
Dan Shi346725f2013-03-20 15:22:38 -0700405 self.run('cgpt show $(rootdev -s -d)')
Dan Shi0f466e82013-02-22 15:44:58 -0800406 logging.debug('Dumping crossystem for firmware debugging.')
Dan Shi346725f2013-03-20 15:22:38 -0700407 self.run('crossystem --all')
Dan Shi0f466e82013-02-22 15:44:58 -0800408 logging.error('Expected Chromium OS version: %s. '
409 'Found Chromium OS %s',
Dan Shi346725f2013-03-20 15:22:38 -0700410 updater.update_version, updater.get_build_id())
411 raise autoupdater.ChromiumOSError('Updater failed on host %s' %
412 self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800413
414 # Figure out newly active kernel.
415 new_active_kernel, _ = updater.get_kernel_state()
416
417 # Ensure that previously inactive kernel is now the active kernel.
418 if inactive_kernel and new_active_kernel != inactive_kernel:
419 raise autoupdater.ChromiumOSError(
420 'Update failed. New kernel partition is not active after'
421 ' boot.')
422
Scott Zawalskieadbf702013-03-14 09:23:06 -0400423 try:
424 host_attributes = site_host_attributes.HostAttributes(self.hostname)
425 except models.Host.DoesNotExist:
426 host_attributes = None
427 if host_attributes and host_attributes.has_chromeos_firmware:
Dan Shi0f466e82013-02-22 15:44:58 -0800428 # Wait until tries == 0 and success, or until timeout.
429 utils.poll_for_condition(
430 lambda: (updater.get_kernel_tries(new_active_kernel) == 0
431 and updater.get_kernel_success(new_active_kernel)),
432 exception=autoupdater.ChromiumOSError(
433 'Update failed. Timed out waiting for system to mark'
434 ' new kernel as successful.'),
435 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
436
437
Scott Zawalskieadbf702013-03-14 09:23:06 -0400438 def _stage_build_and_return_update_url(self, image_name):
439 """Stage a build on a devserver and return the update_url.
440
441 @param image_name: a name like lumpy-release/R27-3837.0.0
442 @returns an update URL like:
443 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
444 """
445 logging.info('Staging requested build: %s', image_name)
446 devserver = dev_server.ImageServer.resolve(image_name)
447 devserver.trigger_download(image_name, synchronous=False)
448 return tools.image_url_pattern() % (devserver.url(), image_name)
449
450
Chris Sosaa3ac2152012-05-23 22:23:13 -0700451 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500452 local_devserver=False, repair=False):
453 """Install the DUT.
454
Dan Shi0f466e82013-02-22 15:44:58 -0800455 Use stateful update if the DUT is already running the same build.
456 Stateful update does not update kernel and tends to run much faster
457 than a full reimage. If the DUT is running a different build, or it
458 failed to do a stateful update, full update, including kernel update,
459 will be applied to the DUT.
460
Scott Zawalskieadbf702013-03-14 09:23:06 -0400461 Once a host enters machine_install its cros_version label will be
462 removed as well as its host attribute job_repo_url (used for
463 package install).
464
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500465 @param update_url: The url to use for the update
466 pattern: http://$devserver:###/update/$build
467 If update_url is None and repair is True we will install the
468 stable image listed in global_config under
469 CROS.stable_cros_version.
470 @param force_update: Force an update even if the version installed
471 is the same. Default:False
472 @param local_devserver: Used by run_remote_test to allow people to
473 use their local devserver. Default: False
474 @param repair: Whether or not we are in repair mode. This adds special
475 cases for repairing a machine like starting update_engine.
476 Setting repair to True sets force_update to True as well.
477 default: False
478 @raises autoupdater.ChromiumOSError
479
480 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700481 if not update_url and self._parser.options.image:
Scott Zawalskieadbf702013-03-14 09:23:06 -0400482 requested_build = self._parser.options.image
483 if requested_build.startswith('http://'):
484 update_url = requested_build
485 else:
486 # Try to stage any build that does not start with http:// on
487 # the devservers defined in global_config.ini.
488 update_url = self._stage_build_and_return_update_url(
489 requested_build)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500490 elif not update_url and not repair:
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700491 raise autoupdater.ChromiumOSError(
492 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500493 elif not update_url and repair:
Scott Zawalskieadbf702013-03-14 09:23:06 -0400494 update_url = self._stage_build_and_return_update_url(
495 self.get_repair_image_name())
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500496
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500497 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800498 # In case the system is in a bad state, we always reboot the machine
499 # before machine_install.
J. Richard Barnettecb934ac2013-04-01 14:27:54 -0700500 self.reboot(timeout=self._UPDATE_REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500501 self.run('stop update-engine; start update-engine')
502 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800503
Chris Sosaa3ac2152012-05-23 22:23:13 -0700504 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Dan Shi0f466e82013-02-22 15:44:58 -0800505 local_devserver=local_devserver)
506 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400507 # Remove cros-version and job_repo_url host attribute from host.
508 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800509 # If the DUT is already running the same build, try stateful update
510 # first. Stateful update does not update kernel and tends to run much
511 # faster than a full reimage.
512 try:
513 updated = self._try_stateful_update(update_url, force_update,
514 updater)
515 if updated:
516 logging.info('DUT is updated with stateful update.')
517 except Exception as e:
518 logging.exception(e)
519 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700520
Dan Shi0f466e82013-02-22 15:44:58 -0800521 inactive_kernel = None
522 # Do a full update if stateful update is not applicable or failed.
523 if not updated:
524 # In case the system is in a bad state, we always reboot the
525 # machine before machine_install.
J. Richard Barnettecb934ac2013-04-01 14:27:54 -0700526 self.reboot(timeout=self._UPDATE_REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700527
528 # TODO(sosa): Remove temporary hack to get rid of bricked machines
529 # that can't update due to a corrupted policy.
530 self.run('rm -rf /var/lib/whitelist')
531 self.run('touch /var/lib/whitelist')
532 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400533 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700534
Dan Shi0f466e82013-02-22 15:44:58 -0800535 if updater.run_update(force_update):
536 updated = True
537 # Figure out active and inactive kernel.
538 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700539
Dan Shi0f466e82013-02-22 15:44:58 -0800540 # Ensure inactive kernel has higher priority than active.
541 if (updater.get_kernel_priority(inactive_kernel)
542 < updater.get_kernel_priority(active_kernel)):
543 raise autoupdater.ChromiumOSError(
544 'Update failed. The priority of the inactive kernel'
545 ' partition is less than that of the active kernel'
546 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700547
Dan Shi0f466e82013-02-22 15:44:58 -0800548 update_engine_log = '/var/log/update_engine.log'
549 logging.info('Dumping %s', update_engine_log)
550 self.run('cat %s' % update_engine_log)
551 # Updater has returned successfully; reboot the host.
J. Richard Barnettecb934ac2013-04-01 14:27:54 -0700552 self.reboot(timeout=self._UPDATE_REBOOT_TIMEOUT,
553 wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700554
Dan Shi0f466e82013-02-22 15:44:58 -0800555 if updated:
556 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400557 image_name = autoupdater.url_to_image_name(update_url)
558 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800559
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700560 # Clean up any old autotest directories which may be lying around.
561 for path in global_config.global_config.get_config_value(
562 'AUTOSERV', 'client_autodir_paths', type=list):
563 self.run('rm -rf ' + path)
564
565
Simran Basi833814b2013-01-29 13:13:43 -0800566 def _get_label_from_afe(self, label_prefix):
567 """Retrieve a host's specific label from the AFE.
568
569 Looks for a host label that has the form <label_prefix>:<value>
570 and returns the "<value>" part of the label. None is returned
571 if there is not a label matching the pattern
572
573 @returns the label that matches the prefix or 'None'
574 """
575 host_model = models.Host.objects.get(hostname=self.hostname)
576 host_label = host_model.labels.get(name__startswith=label_prefix)
577 if not host_label:
578 return None
579 return host_label.name.split(label_prefix, 1)[1]
580
581
Richard Barnette82c35912012-11-20 10:09:10 -0800582 def _get_board_from_afe(self):
583 """Retrieve this host's board from its labels in the AFE.
584
585 Looks for a host label of the form "board:<board>", and
586 returns the "<board>" part of the label. `None` is returned
587 if there is not a single, unique label matching the pattern.
588
589 @returns board from label, or `None`.
590 """
Simran Basi833814b2013-01-29 13:13:43 -0800591 return self._get_label_from_afe(ds_constants.BOARD_PREFIX)
592
593
594 def get_build(self):
595 """Retrieve the current build for this Host from the AFE.
596
597 Looks through this host's labels in the AFE to determine its build.
598
599 @returns The current build or None if it could not find it or if there
600 were multiple build labels assigned to this host.
601 """
602 return self._get_label_from_afe(ds_constants.VERSION_PREFIX)
Richard Barnette82c35912012-11-20 10:09:10 -0800603
604
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500605 def _install_repair(self):
606 """Attempt to repair this host using upate-engine.
607
608 If the host is up, try installing the DUT with a stable
609 "repair" version of Chrome OS as defined in the global_config
610 under CROS.stable_cros_version.
611
612 @returns True if successful, False if update_engine failed.
613
614 """
615 if not self.is_up():
616 return False
617
618 logging.info('Attempting to reimage machine to repair image.')
619 try:
620 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700621 except autoupdater.ChromiumOSError as e:
622 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500623 logging.info('Repair via install failed.')
624 return False
625
626 return True
627
628
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700629 def servo_repair(self, image_url):
Richard Barnette03a0c132012-11-05 12:40:35 -0800630 """Attempt to repair this host using an attached Servo.
631
632 Re-install the OS on the DUT by 1) installing a test image
633 on a USB storage device attached to the Servo board,
634 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700635 3) installing the image with chromeos-install.
636
637 @param image_url URL from which to download the test image to
638 be installed the DUT.
Richard Barnette03a0c132012-11-05 12:40:35 -0800639
640 """
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700641 self.servo.install_recovery_image(image_url)
Richard Barnette03a0c132012-11-05 12:40:35 -0800642 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
643 raise error.AutoservError('DUT failed to boot from USB'
644 ' after %d seconds' %
645 self.USB_BOOT_TIMEOUT)
646 self.run('chromeos-install --yes',
647 timeout=self._INSTALL_TIMEOUT)
648 self.servo.power_long_press()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700649 self.servo.switch_usbkey('host')
Richard Barnette03a0c132012-11-05 12:40:35 -0800650 self.servo.power_short_press()
651 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
652 raise error.AutoservError('DUT failed to reboot installed '
653 'test image after %d seconds' %
654 self.BOOT_TIMEOUT)
655
656
Richard Barnette82c35912012-11-20 10:09:10 -0800657 def _powercycle_to_repair(self):
658 """Utilize the RPM Infrastructure to bring the host back up.
659
660 If the host is not up/repaired after the first powercycle we utilize
661 auto fallback to the last good install by powercycling and rebooting the
662 host 6 times.
663 """
664 logging.info('Attempting repair via RPM powercycle.')
665 failed_cycles = 0
666 self.power_cycle()
667 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
668 failed_cycles += 1
669 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
670 raise error.AutoservError('Powercycled host %s %d times; '
671 'device did not come back online.' %
672 (self.hostname, failed_cycles))
673 self.power_cycle()
674 if failed_cycles == 0:
675 logging.info('Powercycling was successful first time.')
676 else:
677 logging.info('Powercycling was successful after %d failures.',
678 failed_cycles)
679
680
681 def repair_full(self):
682 """Repair a host for repair level NO_PROTECTION.
683
684 This overrides the base class function for repair; it does
685 not call back to the parent class, but instead offers a
686 simplified implementation based on the capabilities in the
687 Chrome OS test lab.
688
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700689 If `self.verify()` fails, the following procedures are
690 attempted:
691 1. Try to re-install to a known stable image using
692 auto-update.
693 2. If there's a servo for the DUT, try to re-install via
694 the servo.
695 3. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800696 by power-cycling.
697
698 As with the parent method, the last operation performed on
699 the DUT must be to call `self.verify()`; if that call fails,
700 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700701
Richard Barnette82c35912012-11-20 10:09:10 -0800702 """
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400703 host_board = self._get_board_from_afe()
704 if host_board is None:
705 logging.error('host %s has no board; failing repair',
706 self.hostname)
707 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500708
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400709 if not self._install_repair():
710 # TODO(scottz): All repair pathways should be
711 # executed until we've exhausted all options. Below
712 # we favor servo over powercycle when we really
713 # should be falling back to power if servo fails.
J. Richard Barnette69929a52013-03-15 13:22:11 -0700714 if (self.servo and self.servo.recovery_supported()):
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700715 self.servo_repair(
716 dev_server.ImageServer.devserver_url_for_servo(host_board))
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400717 elif (self.has_power() and
718 host_board in self._RPM_RECOVERY_BOARDS):
719 self._powercycle_to_repair()
720 else:
721 logging.error('host %s has no servo and no RPM control; '
722 'failing repair', self.hostname)
723 raise
724 self.verify()
Richard Barnette82c35912012-11-20 10:09:10 -0800725
726
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700727 def close(self):
728 super(SiteHost, self).close()
729 self.xmlrpc_disconnect_all()
730
731
Simran Basi5e6339a2013-03-21 11:34:32 -0700732 def _cleanup_poweron(self):
733 """Special cleanup method to make sure hosts always get power back."""
734 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
735 hosts = afe.get_hosts(hostname=self.hostname)
736 if not hosts or not (self._RPM_OUTLET_CHANGED in
737 hosts[0].attributes):
738 return
739 logging.debug('This host has recently interacted with the RPM'
740 ' Infrastructure. Ensuring power is on.')
741 try:
742 self.power_on()
743 except rpm_client.RemotePowerException:
744 # If cleanup has completed but there was an issue with the RPM
745 # Infrastructure, log an error message rather than fail cleanup
746 logging.error('Failed to turn Power On for this host after '
747 'cleanup through the RPM Infrastructure.')
748 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
749 hostname=self.hostname)
750
751
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700752 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700753 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800754 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500755 try:
756 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
757 '_clear_login_prompt_state')
758 self.run('restart ui')
759 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
760 '_wait_for_login_prompt')
Alex Millerf4517962013-02-25 15:03:02 -0800761 except (error.AutotestRunError, error.AutoservRunError):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500762 logging.warn('Unable to restart ui, rebooting device.')
763 # Since restarting the UI fails fall back to normal Autotest
764 # cleanup routines, i.e. reboot the machine.
765 super(SiteHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -0700766 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -0700767 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -0700768 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700769
770
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700771 def reboot(self, **dargs):
772 """
773 This function reboots the site host. The more generic
774 RemoteHost.reboot() performs sync and sleeps for 5
775 seconds. This is not necessary for Chrome OS devices as the
776 sync should be finished in a short time during the reboot
777 command.
778 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800779 if 'reboot_cmd' not in dargs:
780 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
781 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700782 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800783 if 'fastsync' not in dargs:
784 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700785 super(SiteHost, self).reboot(**dargs)
786
787
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700788 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800789 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700790
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800791 Tests for the following conditions:
792 1. All conditions tested by the parent version of this
793 function.
794 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -0700795 3. Sufficient space in /mnt/stateful_partition/encrypted.
796 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700797
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700798 """
799 super(SiteHost, self).verify_software()
800 self.check_diskspace(
801 '/mnt/stateful_partition',
802 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -0700803 'SERVER', 'gb_diskspace_required', type=float,
804 default=20.0))
805 self.check_diskspace(
806 '/mnt/stateful_partition/encrypted',
807 global_config.global_config.get_config_value(
808 'SERVER', 'gb_encrypted_diskspace_required', type=float,
809 default=0.1))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800810 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -0500811 # Makes sure python is present, loads and can use built in functions.
812 # We have seen cases where importing cPickle fails with undefined
813 # symbols in cPickle.so.
814 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700815
816
Christopher Wileyd78249a2013-03-01 13:05:31 -0800817 def xmlrpc_connect(self, command, port, command_name=None,
818 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700819 """Connect to an XMLRPC server on the host.
820
821 The `command` argument should be a simple shell command that
822 starts an XMLRPC server on the given `port`. The command
823 must not daemonize, and must terminate cleanly on SIGTERM.
824 The command is started in the background on the host, and a
825 local XMLRPC client for the server is created and returned
826 to the caller.
827
828 Note that the process of creating an XMLRPC client makes no
829 attempt to connect to the remote server; the caller is
830 responsible for determining whether the server is running
831 correctly, and is ready to serve requests.
832
Christopher Wileyd78249a2013-03-01 13:05:31 -0800833 Optionally, the caller can pass ready_test_name, a string
834 containing the name of a method to call on the proxy. This
835 method should take no parameters and return successfully only
836 when the server is ready to process client requests. When
837 ready_test_name is set, xmlrpc_connect will block until the
838 proxy is ready, and throw a TestError if the server isn't
839 ready by timeout_seconds.
840
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700841 @param command Shell command to start the server.
842 @param port Port number on which the server is expected to
843 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800844 @param command_name String to use as input to `pkill` to
845 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -0800846 @param ready_test_name String containing the name of a
847 method defined on the XMLRPC server.
848 @param timeout_seconds Number of seconds to wait
849 for the server to become 'ready.' Will throw a
850 TestFail error if server is not ready in time.
851
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700852 """
853 self.xmlrpc_disconnect(port)
854
855 # Chrome OS on the target closes down most external ports
856 # for security. We could open the port, but doing that
857 # would conflict with security tests that check that only
858 # expected ports are open. So, to get to the port on the
859 # target we use an ssh tunnel.
860 local_port = utils.get_unused_port()
861 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
862 ssh_cmd = make_ssh_command(opts=tunnel_options)
863 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
864 logging.debug('Full tunnel command: %s', tunnel_cmd)
865 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
866 logging.debug('Started XMLRPC tunnel, local = %d'
867 ' remote = %d, pid = %d',
868 local_port, port, tunnel_proc.pid)
869
870 # Start the server on the host. Redirection in the command
871 # below is necessary, because 'ssh' won't terminate until
872 # background child processes close stdin, stdout, and
873 # stderr.
874 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
875 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
876 logging.debug('Started XMLRPC server on host %s, pid = %s',
877 self.hostname, remote_pid)
878
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800879 self._xmlrpc_proxy_map[port] = (command_name, tunnel_proc)
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700880 rpc_url = 'http://localhost:%d' % local_port
Christopher Wileyd78249a2013-03-01 13:05:31 -0800881 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
882 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -0800883 # retry.retry logs each attempt; calculate delay_sec to
884 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -0700885 @retry.retry((socket.error,
886 xmlrpclib.ProtocolError,
887 httplib.BadStatusLine),
Christopher Wileyd78249a2013-03-01 13:05:31 -0800888 timeout_min=timeout_seconds/60.0,
J. Richard Barnette13eb7c02013-03-07 12:06:29 -0800889 delay_sec=min(max(timeout_seconds/20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -0800890 def ready_test():
891 """ Call proxy.ready_test_name(). """
892 getattr(proxy, ready_test_name)()
893 successful = False
894 try:
895 logging.info('Waiting %d seconds for XMLRPC server '
896 'to start.', timeout_seconds)
897 ready_test()
898 successful = True
899 except retry.TimeoutException:
900 raise error.TestError('Unable to start XMLRPC server after '
901 '%d seconds.' % timeout_seconds)
902 finally:
903 if not successful:
904 logging.error('Failed to start XMLRPC server.')
905 self.xmlrpc_disconnect(port)
906 logging.info('XMLRPC server started successfully.')
907 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700908
909 def xmlrpc_disconnect(self, port):
910 """Disconnect from an XMLRPC server on the host.
911
912 Terminates the remote XMLRPC server previously started for
913 the given `port`. Also closes the local ssh tunnel created
914 for the connection to the host. This function does not
915 directly alter the state of a previously returned XMLRPC
916 client object; however disconnection will cause all
917 subsequent calls to methods on the object to fail.
918
919 This function does nothing if requested to disconnect a port
920 that was not previously connected via `self.xmlrpc_connect()`
921
922 @param port Port number passed to a previous call to
923 `xmlrpc_connect()`
924 """
925 if port not in self._xmlrpc_proxy_map:
926 return
927 entry = self._xmlrpc_proxy_map[port]
928 remote_name = entry[0]
929 tunnel_proc = entry[1]
930 if remote_name:
931 # We use 'pkill' to find our target process rather than
932 # a PID, because the host may have rebooted since
933 # connecting, and we don't want to kill an innocent
934 # process with the same PID.
935 #
936 # 'pkill' helpfully exits with status 1 if no target
937 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -0700938 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700939 # status.
940 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
941
942 if tunnel_proc.poll() is None:
943 tunnel_proc.terminate()
944 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
945 else:
946 logging.debug('Tunnel pid %d terminated early, status %d',
947 tunnel_proc.pid, tunnel_proc.returncode)
948 del self._xmlrpc_proxy_map[port]
949
950
951 def xmlrpc_disconnect_all(self):
952 """Disconnect all known XMLRPC proxy ports."""
953 for port in self._xmlrpc_proxy_map.keys():
954 self.xmlrpc_disconnect(port)
955
956
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800957 def _ping_check_status(self, status):
958 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700959
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800960 @param status Check the ping status against this value.
961 @return True iff `status` and the result of ping are the same
962 (i.e. both True or both False).
963
964 """
965 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
966 return not (status ^ (ping_val == 0))
967
968 def _ping_wait_for_status(self, status, timeout):
969 """Wait for the host to have a given status (UP or DOWN).
970
971 Status is checked by polling. Polling will not last longer
972 than the number of seconds in `timeout`. The polling
973 interval will be long enough that only approximately
974 _PING_WAIT_COUNT polling cycles will be executed, subject
975 to a maximum interval of about one minute.
976
977 @param status Waiting will stop immediately if `ping` of the
978 host returns this status.
979 @param timeout Poll for at most this many seconds.
980 @return True iff the host status from `ping` matched the
981 requested status at the time of return.
982
983 """
984 # _ping_check_status() takes about 1 second, hence the
985 # "- 1" in the formula below.
986 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
987 end_time = time.time() + timeout
988 while time.time() <= end_time:
989 if self._ping_check_status(status):
990 return True
991 if poll_interval > 0:
992 time.sleep(poll_interval)
993
994 # The last thing we did was sleep(poll_interval), so it may
995 # have been too long since the last `ping`. Check one more
996 # time, just to be sure.
997 return self._ping_check_status(status)
998
999 def ping_wait_up(self, timeout):
1000 """Wait for the host to respond to `ping`.
1001
1002 N.B. This method is not a reliable substitute for
1003 `wait_up()`, because a host that responds to ping will not
1004 necessarily respond to ssh. This method should only be used
1005 if the target DUT can be considered functional even if it
1006 can't be reached via ssh.
1007
1008 @param timeout Minimum time to allow before declaring the
1009 host to be non-responsive.
1010 @return True iff the host answered to ping before the timeout.
1011
1012 """
1013 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001014
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001015 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001016 """Wait until the host no longer responds to `ping`.
1017
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001018 This function can be used as a slightly faster version of
1019 `wait_down()`, by avoiding potentially long ssh timeouts.
1020
1021 @param timeout Minimum time to allow for the host to become
1022 non-responsive.
1023 @return True iff the host quit answering ping before the
1024 timeout.
1025
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001026 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001027 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001028
1029 def test_wait_for_sleep(self):
1030 """Wait for the client to enter low-power sleep mode.
1031
1032 The test for "is asleep" can't distinguish a system that is
1033 powered off; to confirm that the unit was asleep, it is
1034 necessary to force resume, and then call
1035 `test_wait_for_resume()`.
1036
1037 This function is expected to be called from a test as part
1038 of a sequence like the following:
1039
1040 ~~~~~~~~
1041 boot_id = host.get_boot_id()
1042 # trigger sleep on the host
1043 host.test_wait_for_sleep()
1044 # trigger resume on the host
1045 host.test_wait_for_resume(boot_id)
1046 ~~~~~~~~
1047
1048 @exception TestFail The host did not go to sleep within
1049 the allowed time.
1050 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001051 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001052 raise error.TestFail(
1053 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001054 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001055
1056
1057 def test_wait_for_resume(self, old_boot_id):
1058 """Wait for the client to resume from low-power sleep mode.
1059
1060 The `old_boot_id` parameter should be the value from
1061 `get_boot_id()` obtained prior to entering sleep mode. A
1062 `TestFail` exception is raised if the boot id changes.
1063
1064 See @ref test_wait_for_sleep for more on this function's
1065 usage.
1066
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001067 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001068 target host went to sleep.
1069
1070 @exception TestFail The host did not respond within the
1071 allowed time.
1072 @exception TestFail The host responded, but the boot id test
1073 indicated a reboot rather than a sleep
1074 cycle.
1075 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001076 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001077 raise error.TestFail(
1078 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001079 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001080 else:
1081 new_boot_id = self.get_boot_id()
1082 if new_boot_id != old_boot_id:
1083 raise error.TestFail(
1084 'client rebooted, but sleep was expected'
1085 ' (old boot %s, new boot %s)'
1086 % (old_boot_id, new_boot_id))
1087
1088
1089 def test_wait_for_shutdown(self):
1090 """Wait for the client to shut down.
1091
1092 The test for "has shut down" can't distinguish a system that
1093 is merely asleep; to confirm that the unit was down, it is
1094 necessary to force boot, and then call test_wait_for_boot().
1095
1096 This function is expected to be called from a test as part
1097 of a sequence like the following:
1098
1099 ~~~~~~~~
1100 boot_id = host.get_boot_id()
1101 # trigger shutdown on the host
1102 host.test_wait_for_shutdown()
1103 # trigger boot on the host
1104 host.test_wait_for_boot(boot_id)
1105 ~~~~~~~~
1106
1107 @exception TestFail The host did not shut down within the
1108 allowed time.
1109 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001110 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001111 raise error.TestFail(
1112 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001113 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001114
1115
1116 def test_wait_for_boot(self, old_boot_id=None):
1117 """Wait for the client to boot from cold power.
1118
1119 The `old_boot_id` parameter should be the value from
1120 `get_boot_id()` obtained prior to shutting down. A
1121 `TestFail` exception is raised if the boot id does not
1122 change. The boot id test is omitted if `old_boot_id` is not
1123 specified.
1124
1125 See @ref test_wait_for_shutdown for more on this function's
1126 usage.
1127
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001128 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001129 shut down.
1130
1131 @exception TestFail The host did not respond within the
1132 allowed time.
1133 @exception TestFail The host responded, but the boot id test
1134 indicated that there was no reboot.
1135 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001136 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001137 raise error.TestFail(
1138 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001139 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001140 elif old_boot_id:
1141 if self.get_boot_id() == old_boot_id:
1142 raise error.TestFail(
1143 'client is back up, but did not reboot'
1144 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001145
1146
1147 @staticmethod
1148 def check_for_rpm_support(hostname):
1149 """For a given hostname, return whether or not it is powered by an RPM.
1150
1151 @return None if this host does not follows the defined naming format
1152 for RPM powered DUT's in the lab. If it does follow the format,
1153 it returns a regular expression MatchObject instead.
1154 """
Richard Barnette82c35912012-11-20 10:09:10 -08001155 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001156
1157
1158 def has_power(self):
1159 """For this host, return whether or not it is powered by an RPM.
1160
1161 @return True if this host is in the CROS lab and follows the defined
1162 naming format.
1163 """
1164 return SiteHost.check_for_rpm_support(self.hostname)
1165
1166
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001167 def _set_power(self, state, power_method):
1168 """Sets the power to the host via RPM, Servo or manual.
1169
1170 @param state Specifies which power state to set to DUT
1171 @param power_method Specifies which method of power control to
1172 use. By default "RPM" will be used. Valid values
1173 are the strings "RPM", "manual", "servoj10".
1174
1175 """
1176 ACCEPTABLE_STATES = ['ON', 'OFF']
1177
1178 if state.upper() not in ACCEPTABLE_STATES:
1179 raise error.TestError('State must be one of: %s.'
1180 % (ACCEPTABLE_STATES,))
1181
1182 if power_method == self.POWER_CONTROL_SERVO:
1183 logging.info('Setting servo port J10 to %s', state)
1184 self.servo.set('prtctl3_pwren', state.lower())
1185 time.sleep(self._USB_POWER_TIMEOUT)
1186 elif power_method == self.POWER_CONTROL_MANUAL:
1187 logging.info('You have %d seconds to set the AC power to %s.',
1188 self._POWER_CYCLE_TIMEOUT, state)
1189 time.sleep(self._POWER_CYCLE_TIMEOUT)
1190 else:
1191 if not self.has_power():
1192 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001193 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1194 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1195 hostname=self.hostname)
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001196 rpm_client.set_power(self.hostname, state.upper())
Simran Basid5e5e272012-09-24 15:23:59 -07001197
1198
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001199 def power_off(self, power_method=POWER_CONTROL_RPM):
1200 """Turn off power to this host via RPM, Servo or manual.
1201
1202 @param power_method Specifies which method of power control to
1203 use. By default "RPM" will be used. Valid values
1204 are the strings "RPM", "manual", "servoj10".
1205
1206 """
1207 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001208
1209
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001210 def power_on(self, power_method=POWER_CONTROL_RPM):
1211 """Turn on power to this host via RPM, Servo or manual.
1212
1213 @param power_method Specifies which method of power control to
1214 use. By default "RPM" will be used. Valid values
1215 are the strings "RPM", "manual", "servoj10".
1216
1217 """
1218 self._set_power('ON', power_method)
1219
1220
1221 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1222 """Cycle power to this host by turning it OFF, then ON.
1223
1224 @param power_method Specifies which method of power control to
1225 use. By default "RPM" will be used. Valid values
1226 are the strings "RPM", "manual", "servoj10".
1227
1228 """
1229 if power_method in (self.POWER_CONTROL_SERVO,
1230 self.POWER_CONTROL_MANUAL):
1231 self.power_off(power_method=power_method)
1232 time.sleep(self._POWER_CYCLE_TIMEOUT)
1233 self.power_on(power_method=power_method)
1234 else:
1235 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001236
1237
1238 def get_platform(self):
1239 """Determine the correct platform label for this host.
1240
1241 @returns a string representing this host's platform.
1242 """
1243 crossystem = utils.Crossystem(self)
1244 crossystem.init()
1245 # Extract fwid value and use the leading part as the platform id.
1246 # fwid generally follow the format of {platform}.{firmware version}
1247 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1248 platform = crossystem.fwid().split('.')[0].lower()
1249 # Newer platforms start with 'Google_' while the older ones do not.
1250 return platform.replace('google_', '')
1251
1252
Aviv Keshet74c89a92013-02-04 15:18:30 -08001253 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001254 def get_board(self):
1255 """Determine the correct board label for this host.
1256
1257 @returns a string representing this host's board.
1258 """
1259 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1260 run_method=self.run)
1261 board = release_info['CHROMEOS_RELEASE_BOARD']
1262 # Devices in the lab generally have the correct board name but our own
1263 # development devices have {board_name}-signed-{key_type}. The board
1264 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001265 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001266 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001267 return board_format_string % board.split('-')[0]
1268 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001269
1270
Aviv Keshet74c89a92013-02-04 15:18:30 -08001271 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001272 def has_lightsensor(self):
1273 """Determine the correct board label for this host.
1274
1275 @returns the string 'lightsensor' if this host has a lightsensor or
1276 None if it does not.
1277 """
1278 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001279 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001280 try:
1281 # Run the search cmd following the symlinks. Stderr_tee is set to
1282 # None as there can be a symlink loop, but the command will still
1283 # execute correctly with a few messages printed to stderr.
1284 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1285 return 'lightsensor'
1286 except error.AutoservRunError:
1287 # egrep exited with a return code of 1 meaning none of the possible
1288 # lightsensor files existed.
1289 return None
1290
1291
Aviv Keshet74c89a92013-02-04 15:18:30 -08001292 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001293 def has_bluetooth(self):
1294 """Determine the correct board label for this host.
1295
1296 @returns the string 'bluetooth' if this host has bluetooth or
1297 None if it does not.
1298 """
1299 try:
1300 self.run('test -d /sys/class/bluetooth/hci0')
1301 # test exited with a return code of 0.
1302 return 'bluetooth'
1303 except error.AutoservRunError:
1304 # test exited with a return code 1 meaning the directory did not
1305 # exist.
1306 return None
1307
1308
1309 def get_labels(self):
1310 """Return a list of labels for this given host.
1311
1312 This is the main way to retrieve all the automatic labels for a host
1313 as it will run through all the currently implemented label functions.
1314 """
1315 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001316 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001317 label = label_function(self)
1318 if label:
1319 labels.append(label)
1320 return labels