blob: af5c812e6542d66e158227caa4fafc13303b1aeb [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
J. Richard Barnette1d78b012012-05-15 13:56:30 -07006import logging
Simran Basid5e5e272012-09-24 15:23:59 -07007import re
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07009import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070010import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070011
J. Richard Barnette45e93de2012-04-11 17:24:15 -070012from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080013from autotest_lib.client.common_lib import error
14from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070015from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080016from autotest_lib.client.common_lib.cros import dev_server
Richard Barnette82c35912012-11-20 10:09:10 -080017from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070018from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070019from autotest_lib.server import autotest
J. Richard Barnette45e93de2012-04-11 17:24:15 -070020from autotest_lib.server import site_host_attributes
J. Richard Barnette67ccb872012-04-19 16:34:56 -070021from autotest_lib.server.cros import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070022from autotest_lib.server.hosts import remote
Simran Basidcff4252012-11-20 16:13:20 -080023from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070024
Richard Barnette82c35912012-11-20 10:09:10 -080025# Importing frontend.afe.models requires a full Autotest
26# installation (with the Django modules), not just the source
27# repository. Most developers won't have the full installation, so
28# the imports below will fail for them.
29#
30# The fix is to catch import exceptions, and set `models` to `None`
31# on failure. This has the side effect that
32# SiteHost._get_board_from_afe() will fail: That will manifest as
33# failures during Repair jobs leaving the DUT as "Repair Failed".
34# In practice, you can't test Repair jobs without a full
35# installation, so that kind of failure isn't expected.
36try:
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080037 # pylint: disable=W0611
Richard Barnette82c35912012-11-20 10:09:10 -080038 from autotest_lib.frontend import setup_django_environment
39 from autotest_lib.frontend.afe import models
40except:
41 models = None
42
Simran Basid5e5e272012-09-24 15:23:59 -070043
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080044def _make_servo_hostname(hostname):
45 host_parts = hostname.split('.')
46 host_parts[0] = host_parts[0] + '-servo'
47 return '.'.join(host_parts)
48
49
50def _get_lab_servo(target_hostname):
51 """Instantiate a Servo for |target_hostname| in the lab.
52
53 Assuming that |target_hostname| is a device in the CrOS test
54 lab, create and return a Servo object pointed at the servo
55 attached to that DUT. The servo in the test lab is assumed
56 to already have servod up and running on it.
57
58 @param target_hostname: device whose servo we want to target.
59 @return an appropriately configured Servo instance.
60 """
61 servo_host = _make_servo_hostname(target_hostname)
62 if utils.host_is_in_lab_zone(servo_host):
63 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080064 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080065 except: # pylint: disable=W0702
66 # TODO(jrbarnette): Long-term, if we can't get to
67 # a servo in the lab, we want to fail, so we should
68 # pass any exceptions along. Short-term, we're not
69 # ready to rely on servo, so we ignore failures.
70 pass
71 return None
72
73
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070074def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
75 connect_timeout=None, alive_interval=None):
76 """Override default make_ssh_command to use options tuned for Chrome OS.
77
78 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070079 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
80 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070081
Dale Curtisaa5eedb2011-08-23 16:18:52 -070082 - ServerAliveInterval=180; which causes SSH to ping connection every
83 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
84 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
85 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070086
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070087 - ServerAliveCountMax=3; consistency with remote_access.sh.
88
89 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
90 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070091
92 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
93 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070094
95 - SSH protocol forced to 2; needed for ServerAliveInterval.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080096
97 @param user User name to use for the ssh connection.
98 @param port Port on the target host to use for ssh connection.
99 @param opts Additional options to the ssh command.
100 @param hosts_file Ignored.
101 @param connect_timeout Ignored.
102 @param alive_interval Ignored.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700103 """
104 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
105 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -0700106 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
107 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
108 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700109 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700110
111
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800112
Aviv Keshet74c89a92013-02-04 15:18:30 -0800113def add_label_detector(label_function_list, label_list=None, label=None):
114 """Decorator used to group functions together into the provided list.
115 @param label_function_list: List of label detecting functions to add
116 decorated function to.
117 @param label_list: List of detectable labels to add detectable labels to.
118 (Default: None)
119 @param label: Label string that is detectable by this detection function
120 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800121 """
Simran Basic6f1f7a2012-10-16 10:47:46 -0700122 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -0800123 """
124 @param func: The function to be added as a detector.
125 """
126 label_function_list.append(func)
127 if label and label_list is not None:
128 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700129 return func
130 return add_func
131
132
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700133class SiteHost(remote.RemoteHost):
134 """Chromium OS specific subclass of Host."""
135
136 _parser = autoserv_parser.autoserv_parser
137
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800138 # Time to wait for new kernel to be marked successful after
139 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -0700140 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700141
Richard Barnette03a0c132012-11-05 12:40:35 -0800142 # Timeout values (in seconds) associated with various Chrome OS
143 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700144 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800145 # In general, a good rule of thumb is that the timeout can be up
146 # to twice the typical measured value on the slowest platform.
147 # The times here have not necessarily been empirically tested to
148 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700149 #
150 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800151 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
152 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700153 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800154 # other things, this must account for the 30 second dev-mode
155 # screen delay and time to start the network,
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700156 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800157 # including the 30 second dev-mode delay and time to start the
158 # network,
159 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700160 # REBOOT_TIMEOUT: Combination of shutdown and reboot times.
Richard Barnette03a0c132012-11-05 12:40:35 -0800161 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700162
163 SLEEP_TIMEOUT = 2
164 RESUME_TIMEOUT = 5
165 BOOT_TIMEOUT = 45
166 USB_BOOT_TIMEOUT = 150
167 SHUTDOWN_TIMEOUT = 5
168 REBOOT_TIMEOUT = SHUTDOWN_TIMEOUT + BOOT_TIMEOUT
Richard Barnette03a0c132012-11-05 12:40:35 -0800169 _INSTALL_TIMEOUT = 240
170
171 _DEFAULT_SERVO_URL_FORMAT = ('/static/servo-images/'
172 '%(board)s_test_image.bin')
173
174 # TODO(jrbarnette): Servo repair is restricted to x86-alex,
175 # because the existing servo client code won't work on other
176 # boards. http://crosbug.com/36973
177 _SERVO_REPAIR_WHITELIST = [ 'x86-alex' ]
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800178
179
Richard Barnette82c35912012-11-20 10:09:10 -0800180 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
181 'rpm_recovery_boards', type=str).split(',')
182
183 _MAX_POWER_CYCLE_ATTEMPTS = 6
184 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
185 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
186 'host[0-9]+')
187 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
188 'in_illuminance0_raw',
189 'illuminance0_input']
190 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
191 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800192 _DETECTABLE_LABELS = []
193 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
194 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700195
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800196
J. Richard Barnette964fba02012-10-24 17:34:29 -0700197 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800198 def get_servo_arguments(args_dict):
199 """Extract servo options from `args_dict` and return the result.
200
201 Take the provided dictionary of argument options and return
202 a subset that represent standard arguments needed to
203 construct a servo object for a host. The intent is to
204 provide standard argument processing from run_remote_tests
205 for tests that require a servo to operate.
206
207 Recommended usage:
208 ~~~~~~~~
209 args_dict = utils.args_to_dict(args)
210 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
211 host = hosts.create_host(machine, servo_args=servo_args)
212 ~~~~~~~~
213
214 @param args_dict Dictionary from which to extract the servo
215 arguments.
216 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700217 servo_args = {}
218 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800219 if arg in args_dict:
220 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700221 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700222
J. Richard Barnette964fba02012-10-24 17:34:29 -0700223
224 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700225 """Initialize superclasses, and |self.servo|.
226
227 For creating the host servo object, there are three
228 possibilities: First, if the host is a lab system known to
229 have a servo board, we connect to that servo unconditionally.
230 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700231 servo features for testing, it will pass settings for
232 `servo_host`, `servo_port`, or both. If neither of these
233 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700234
235 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700236 super(SiteHost, self)._initialize(hostname=hostname,
237 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700238 # self.env is a dictionary of environment variable settings
239 # to be exported for commands run on the host.
240 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
241 # errors that might happen.
242 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700243 self._xmlrpc_proxy_map = {}
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800244 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700245 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700246 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700247
248
Chris Sosaa3ac2152012-05-23 22:23:13 -0700249 def machine_install(self, update_url=None, force_update=False,
250 local_devserver=False):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700251 if not update_url and self._parser.options.image:
252 update_url = self._parser.options.image
253 elif not update_url:
254 raise autoupdater.ChromiumOSError(
255 'Update failed. No update URL provided.')
256
Chris Sosafab08082013-01-04 15:21:20 -0800257 # In case the system is in a bad state, we always reboot the machine
258 # before machine_install.
259 self.reboot(timeout=60, wait=True)
260
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700261 # Attempt to update the system.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700262 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
263 local_devserver=local_devserver)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700264 if updater.run_update(force_update):
265 # Figure out active and inactive kernel.
266 active_kernel, inactive_kernel = updater.get_kernel_state()
267
268 # Ensure inactive kernel has higher priority than active.
269 if (updater.get_kernel_priority(inactive_kernel)
270 < updater.get_kernel_priority(active_kernel)):
271 raise autoupdater.ChromiumOSError(
272 'Update failed. The priority of the inactive kernel'
273 ' partition is less than that of the active kernel'
274 ' partition.')
275
Scott Zawalski21902002012-09-19 17:57:00 -0400276 update_engine_log = '/var/log/update_engine.log'
277 logging.info('Dumping %s', update_engine_log)
278 self.run('cat %s' % update_engine_log)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800279 # Updater has returned successfully; reboot the host.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700280 self.reboot(timeout=60, wait=True)
Chris Sosae146ed82012-09-19 17:58:36 -0700281 # Touch the lab machine file to leave a marker that distinguishes
282 # this image from other test images.
Richard Barnette82c35912012-11-20 10:09:10 -0800283 self.run('touch %s' % self._LAB_MACHINE_FILE)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700284
285 # Following the reboot, verify the correct version.
286 updater.check_version()
287
288 # Figure out newly active kernel.
289 new_active_kernel, _ = updater.get_kernel_state()
290
291 # Ensure that previously inactive kernel is now the active kernel.
292 if new_active_kernel != inactive_kernel:
293 raise autoupdater.ChromiumOSError(
294 'Update failed. New kernel partition is not active after'
295 ' boot.')
296
297 host_attributes = site_host_attributes.HostAttributes(self.hostname)
298 if host_attributes.has_chromeos_firmware:
299 # Wait until tries == 0 and success, or until timeout.
300 utils.poll_for_condition(
301 lambda: (updater.get_kernel_tries(new_active_kernel) == 0
302 and updater.get_kernel_success(new_active_kernel)),
303 exception=autoupdater.ChromiumOSError(
304 'Update failed. Timed out waiting for system to mark'
305 ' new kernel as successful.'),
306 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
307
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700308 # Clean up any old autotest directories which may be lying around.
309 for path in global_config.global_config.get_config_value(
310 'AUTOSERV', 'client_autodir_paths', type=list):
311 self.run('rm -rf ' + path)
312
313
Richard Barnette82c35912012-11-20 10:09:10 -0800314 def _get_board_from_afe(self):
315 """Retrieve this host's board from its labels in the AFE.
316
317 Looks for a host label of the form "board:<board>", and
318 returns the "<board>" part of the label. `None` is returned
319 if there is not a single, unique label matching the pattern.
320
321 @returns board from label, or `None`.
322 """
323 host_model = models.Host.objects.get(hostname=self.hostname)
324 board_labels = filter(lambda l: l.name.startswith('board:'),
325 host_model.labels.all())
326 board_name = None
327 if len(board_labels) == 1:
328 board_name = board_labels[0].name.split(':', 1)[1]
329 elif len(board_labels) == 0:
330 logging.error('Host %s does not have a board label.',
331 self.hostname)
332 else:
333 logging.error('Host %s has multiple board labels.',
334 self.hostname)
335 return board_name
336
337
Richard Barnette03a0c132012-11-05 12:40:35 -0800338 def _servo_repair(self, board):
339 """Attempt to repair this host using an attached Servo.
340
341 Re-install the OS on the DUT by 1) installing a test image
342 on a USB storage device attached to the Servo board,
343 2) booting that image in recovery mode, and then
344 3) installing the image.
345
346 """
347 server = dev_server.ImageServer.devserver_url_for_servo(board)
348 image = server + (self._DEFAULT_SERVO_URL_FORMAT %
349 { 'board': board })
350 self.servo.install_recovery_image(image)
351 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
352 raise error.AutoservError('DUT failed to boot from USB'
353 ' after %d seconds' %
354 self.USB_BOOT_TIMEOUT)
355 self.run('chromeos-install --yes',
356 timeout=self._INSTALL_TIMEOUT)
357 self.servo.power_long_press()
358 self.servo.set('usb_mux_sel1', 'servo_sees_usbkey')
359 self.servo.power_short_press()
360 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
361 raise error.AutoservError('DUT failed to reboot installed '
362 'test image after %d seconds' %
363 self.BOOT_TIMEOUT)
364
365
Richard Barnette82c35912012-11-20 10:09:10 -0800366 def _powercycle_to_repair(self):
367 """Utilize the RPM Infrastructure to bring the host back up.
368
369 If the host is not up/repaired after the first powercycle we utilize
370 auto fallback to the last good install by powercycling and rebooting the
371 host 6 times.
372 """
373 logging.info('Attempting repair via RPM powercycle.')
374 failed_cycles = 0
375 self.power_cycle()
376 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
377 failed_cycles += 1
378 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
379 raise error.AutoservError('Powercycled host %s %d times; '
380 'device did not come back online.' %
381 (self.hostname, failed_cycles))
382 self.power_cycle()
383 if failed_cycles == 0:
384 logging.info('Powercycling was successful first time.')
385 else:
386 logging.info('Powercycling was successful after %d failures.',
387 failed_cycles)
388
389
390 def repair_full(self):
391 """Repair a host for repair level NO_PROTECTION.
392
393 This overrides the base class function for repair; it does
394 not call back to the parent class, but instead offers a
395 simplified implementation based on the capabilities in the
396 Chrome OS test lab.
397
398 Repair follows this sequence:
399 1. If the DUT passes `self.verify()`, do nothing.
400 2. If the DUT can be power-cycled via RPM, try to repair
401 by power-cycling.
402
403 As with the parent method, the last operation performed on
404 the DUT must be to call `self.verify()`; if that call fails,
405 the exception it raises is passed back to the caller.
406 """
407 try:
408 self.verify()
409 except:
410 host_board = self._get_board_from_afe()
Richard Barnette03a0c132012-11-05 12:40:35 -0800411 if host_board is None:
412 logging.error('host %s has no board; failing repair',
413 self.hostname)
Richard Barnette82c35912012-11-20 10:09:10 -0800414 raise
Richard Barnette03a0c132012-11-05 12:40:35 -0800415 if (self.servo and
416 host_board in self._SERVO_REPAIR_WHITELIST):
417 self._servo_repair(host_board)
418 elif (self.has_power() and
419 host_board in self._RPM_RECOVERY_BOARDS):
420 self._powercycle_to_repair()
421 else:
422 logging.error('host %s has no servo and no RPM control; '
423 'failing repair', self.hostname)
424 raise
Richard Barnette82c35912012-11-20 10:09:10 -0800425 self.verify()
426
427
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700428 def close(self):
429 super(SiteHost, self).close()
430 self.xmlrpc_disconnect_all()
431
432
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700433 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700434 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800435 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500436 try:
437 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
438 '_clear_login_prompt_state')
439 self.run('restart ui')
440 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
441 '_wait_for_login_prompt')
Scott Zawalski2eed1122013-02-02 17:32:33 -0500442 except error.AutotestRunError:
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500443 logging.warn('Unable to restart ui, rebooting device.')
444 # Since restarting the UI fails fall back to normal Autotest
445 # cleanup routines, i.e. reboot the machine.
446 super(SiteHost, self).cleanup()
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700447
448
Simran Basi154f5582012-10-23 16:27:11 -0700449 # TODO (sbasi) crosbug.com/35656
450 # Renamed the sitehost cleanup method so we don't go down this pathway.
451 # def cleanup(self):
452 def cleanup_poweron(self):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700453 """Special cleanup method to make sure hosts always get power back."""
Chris Sosa9479fcd2012-10-09 13:44:22 -0700454 super(SiteHost, self).cleanup()
Simran Basid5e5e272012-09-24 15:23:59 -0700455 if self.has_power():
Simran Basifd23fb22012-10-22 17:56:22 -0700456 try:
457 self.power_on()
Chris Sosafab08082013-01-04 15:21:20 -0800458 except rpm_client.RemotePowerException:
Simran Basifd23fb22012-10-22 17:56:22 -0700459 # If cleanup has completed but there was an issue with the RPM
460 # Infrastructure, log an error message rather than fail cleanup
461 logging.error('Failed to turn Power On for this host after '
462 'cleanup through the RPM Infrastructure.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700463
464
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700465 def reboot(self, **dargs):
466 """
467 This function reboots the site host. The more generic
468 RemoteHost.reboot() performs sync and sleeps for 5
469 seconds. This is not necessary for Chrome OS devices as the
470 sync should be finished in a short time during the reboot
471 command.
472 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800473 if 'reboot_cmd' not in dargs:
474 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
475 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700476 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800477 if 'fastsync' not in dargs:
478 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700479 super(SiteHost, self).reboot(**dargs)
480
481
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700482 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800483 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700484
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800485 Tests for the following conditions:
486 1. All conditions tested by the parent version of this
487 function.
488 2. Sufficient space in /mnt/stateful_partition.
489 3. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700490
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700491 """
492 super(SiteHost, self).verify_software()
493 self.check_diskspace(
494 '/mnt/stateful_partition',
495 global_config.global_config.get_config_value(
496 'SERVER', 'gb_diskspace_required', type=int,
497 default=20))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800498 self.run('update_engine_client --status')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700499
500
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800501 def xmlrpc_connect(self, command, port, command_name=None):
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700502 """Connect to an XMLRPC server on the host.
503
504 The `command` argument should be a simple shell command that
505 starts an XMLRPC server on the given `port`. The command
506 must not daemonize, and must terminate cleanly on SIGTERM.
507 The command is started in the background on the host, and a
508 local XMLRPC client for the server is created and returned
509 to the caller.
510
511 Note that the process of creating an XMLRPC client makes no
512 attempt to connect to the remote server; the caller is
513 responsible for determining whether the server is running
514 correctly, and is ready to serve requests.
515
516 @param command Shell command to start the server.
517 @param port Port number on which the server is expected to
518 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800519 @param command_name String to use as input to `pkill` to
520 terminate the XMLRPC server on the host.
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700521 """
522 self.xmlrpc_disconnect(port)
523
524 # Chrome OS on the target closes down most external ports
525 # for security. We could open the port, but doing that
526 # would conflict with security tests that check that only
527 # expected ports are open. So, to get to the port on the
528 # target we use an ssh tunnel.
529 local_port = utils.get_unused_port()
530 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
531 ssh_cmd = make_ssh_command(opts=tunnel_options)
532 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
533 logging.debug('Full tunnel command: %s', tunnel_cmd)
534 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
535 logging.debug('Started XMLRPC tunnel, local = %d'
536 ' remote = %d, pid = %d',
537 local_port, port, tunnel_proc.pid)
538
539 # Start the server on the host. Redirection in the command
540 # below is necessary, because 'ssh' won't terminate until
541 # background child processes close stdin, stdout, and
542 # stderr.
543 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
544 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
545 logging.debug('Started XMLRPC server on host %s, pid = %s',
546 self.hostname, remote_pid)
547
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800548 self._xmlrpc_proxy_map[port] = (command_name, tunnel_proc)
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700549 rpc_url = 'http://localhost:%d' % local_port
550 return xmlrpclib.ServerProxy(rpc_url, allow_none=True)
551
552
553 def xmlrpc_disconnect(self, port):
554 """Disconnect from an XMLRPC server on the host.
555
556 Terminates the remote XMLRPC server previously started for
557 the given `port`. Also closes the local ssh tunnel created
558 for the connection to the host. This function does not
559 directly alter the state of a previously returned XMLRPC
560 client object; however disconnection will cause all
561 subsequent calls to methods on the object to fail.
562
563 This function does nothing if requested to disconnect a port
564 that was not previously connected via `self.xmlrpc_connect()`
565
566 @param port Port number passed to a previous call to
567 `xmlrpc_connect()`
568 """
569 if port not in self._xmlrpc_proxy_map:
570 return
571 entry = self._xmlrpc_proxy_map[port]
572 remote_name = entry[0]
573 tunnel_proc = entry[1]
574 if remote_name:
575 # We use 'pkill' to find our target process rather than
576 # a PID, because the host may have rebooted since
577 # connecting, and we don't want to kill an innocent
578 # process with the same PID.
579 #
580 # 'pkill' helpfully exits with status 1 if no target
581 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -0700582 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700583 # status.
584 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
585
586 if tunnel_proc.poll() is None:
587 tunnel_proc.terminate()
588 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
589 else:
590 logging.debug('Tunnel pid %d terminated early, status %d',
591 tunnel_proc.pid, tunnel_proc.returncode)
592 del self._xmlrpc_proxy_map[port]
593
594
595 def xmlrpc_disconnect_all(self):
596 """Disconnect all known XMLRPC proxy ports."""
597 for port in self._xmlrpc_proxy_map.keys():
598 self.xmlrpc_disconnect(port)
599
600
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700601 def _ping_is_up(self):
602 """Ping the host once, and return whether it responded."""
603 return utils.ping(self.hostname, tries=1, deadline=1) == 0
604
605
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800606 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700607 """Wait until the host no longer responds to `ping`.
608
609 @param timeout Minimum time to allow before declaring the
610 host to be non-responsive.
611 """
612
613 # This function is a slightly faster version of wait_down().
614 #
615 # In AbstractSSHHost.wait_down(), `ssh` is used to determine
616 # whether the host is down. In some situations (mine, at
617 # least), `ssh` can take over a minute to determine that the
618 # host is down. The `ping` command answers the question
619 # faster, so we use that here instead.
620 #
621 # There is no equivalent for wait_up(), because a target that
622 # answers to `ping` won't necessarily respond to `ssh`.
623 end_time = time.time() + timeout
624 while time.time() <= end_time:
625 if not self._ping_is_up():
626 return True
627
628 # If the timeout is short relative to the run time of
629 # _ping_is_up(), we might be prone to false failures for
630 # lack of checking frequently enough. To be safe, we make
631 # one last check _after_ the deadline.
632 return not self._ping_is_up()
633
634
635 def test_wait_for_sleep(self):
636 """Wait for the client to enter low-power sleep mode.
637
638 The test for "is asleep" can't distinguish a system that is
639 powered off; to confirm that the unit was asleep, it is
640 necessary to force resume, and then call
641 `test_wait_for_resume()`.
642
643 This function is expected to be called from a test as part
644 of a sequence like the following:
645
646 ~~~~~~~~
647 boot_id = host.get_boot_id()
648 # trigger sleep on the host
649 host.test_wait_for_sleep()
650 # trigger resume on the host
651 host.test_wait_for_resume(boot_id)
652 ~~~~~~~~
653
654 @exception TestFail The host did not go to sleep within
655 the allowed time.
656 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800657 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700658 raise error.TestFail(
659 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700660 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700661
662
663 def test_wait_for_resume(self, old_boot_id):
664 """Wait for the client to resume from low-power sleep mode.
665
666 The `old_boot_id` parameter should be the value from
667 `get_boot_id()` obtained prior to entering sleep mode. A
668 `TestFail` exception is raised if the boot id changes.
669
670 See @ref test_wait_for_sleep for more on this function's
671 usage.
672
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800673 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700674 target host went to sleep.
675
676 @exception TestFail The host did not respond within the
677 allowed time.
678 @exception TestFail The host responded, but the boot id test
679 indicated a reboot rather than a sleep
680 cycle.
681 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700682 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700683 raise error.TestFail(
684 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700685 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700686 else:
687 new_boot_id = self.get_boot_id()
688 if new_boot_id != old_boot_id:
689 raise error.TestFail(
690 'client rebooted, but sleep was expected'
691 ' (old boot %s, new boot %s)'
692 % (old_boot_id, new_boot_id))
693
694
695 def test_wait_for_shutdown(self):
696 """Wait for the client to shut down.
697
698 The test for "has shut down" can't distinguish a system that
699 is merely asleep; to confirm that the unit was down, it is
700 necessary to force boot, and then call test_wait_for_boot().
701
702 This function is expected to be called from a test as part
703 of a sequence like the following:
704
705 ~~~~~~~~
706 boot_id = host.get_boot_id()
707 # trigger shutdown on the host
708 host.test_wait_for_shutdown()
709 # trigger boot on the host
710 host.test_wait_for_boot(boot_id)
711 ~~~~~~~~
712
713 @exception TestFail The host did not shut down within the
714 allowed time.
715 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800716 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700717 raise error.TestFail(
718 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700719 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700720
721
722 def test_wait_for_boot(self, old_boot_id=None):
723 """Wait for the client to boot from cold power.
724
725 The `old_boot_id` parameter should be the value from
726 `get_boot_id()` obtained prior to shutting down. A
727 `TestFail` exception is raised if the boot id does not
728 change. The boot id test is omitted if `old_boot_id` is not
729 specified.
730
731 See @ref test_wait_for_shutdown for more on this function's
732 usage.
733
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800734 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700735 shut down.
736
737 @exception TestFail The host did not respond within the
738 allowed time.
739 @exception TestFail The host responded, but the boot id test
740 indicated that there was no reboot.
741 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700742 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700743 raise error.TestFail(
744 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700745 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700746 elif old_boot_id:
747 if self.get_boot_id() == old_boot_id:
748 raise error.TestFail(
749 'client is back up, but did not reboot'
750 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -0700751
752
753 @staticmethod
754 def check_for_rpm_support(hostname):
755 """For a given hostname, return whether or not it is powered by an RPM.
756
757 @return None if this host does not follows the defined naming format
758 for RPM powered DUT's in the lab. If it does follow the format,
759 it returns a regular expression MatchObject instead.
760 """
Richard Barnette82c35912012-11-20 10:09:10 -0800761 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -0700762
763
764 def has_power(self):
765 """For this host, return whether or not it is powered by an RPM.
766
767 @return True if this host is in the CROS lab and follows the defined
768 naming format.
769 """
770 return SiteHost.check_for_rpm_support(self.hostname)
771
772
Simran Basid5e5e272012-09-24 15:23:59 -0700773 def power_off(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800774 """Turn off power to this host via RPM."""
Simran Basidcff4252012-11-20 16:13:20 -0800775 rpm_client.set_power(self.hostname, 'OFF')
Simran Basid5e5e272012-09-24 15:23:59 -0700776
777
778 def power_on(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800779 """Turn on power to this host via RPM."""
Simran Basidcff4252012-11-20 16:13:20 -0800780 rpm_client.set_power(self.hostname, 'ON')
Simran Basid5e5e272012-09-24 15:23:59 -0700781
782
783 def power_cycle(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800784 """Cycle power to this host by turning it OFF, then ON."""
Simran Basidcff4252012-11-20 16:13:20 -0800785 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -0700786
787
788 def get_platform(self):
789 """Determine the correct platform label for this host.
790
791 @returns a string representing this host's platform.
792 """
793 crossystem = utils.Crossystem(self)
794 crossystem.init()
795 # Extract fwid value and use the leading part as the platform id.
796 # fwid generally follow the format of {platform}.{firmware version}
797 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
798 platform = crossystem.fwid().split('.')[0].lower()
799 # Newer platforms start with 'Google_' while the older ones do not.
800 return platform.replace('google_', '')
801
802
Aviv Keshet74c89a92013-02-04 15:18:30 -0800803 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -0700804 def get_board(self):
805 """Determine the correct board label for this host.
806
807 @returns a string representing this host's board.
808 """
809 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
810 run_method=self.run)
811 board = release_info['CHROMEOS_RELEASE_BOARD']
812 # Devices in the lab generally have the correct board name but our own
813 # development devices have {board_name}-signed-{key_type}. The board
814 # name may also begin with 'x86-' which we need to keep.
815 if 'x86' not in board:
816 return 'board:%s' % board.split('-')[0]
817 return 'board:%s' % '-'.join(board.split('-')[0:2])
818
819
Aviv Keshet74c89a92013-02-04 15:18:30 -0800820 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -0700821 def has_lightsensor(self):
822 """Determine the correct board label for this host.
823
824 @returns the string 'lightsensor' if this host has a lightsensor or
825 None if it does not.
826 """
827 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -0800828 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -0700829 try:
830 # Run the search cmd following the symlinks. Stderr_tee is set to
831 # None as there can be a symlink loop, but the command will still
832 # execute correctly with a few messages printed to stderr.
833 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
834 return 'lightsensor'
835 except error.AutoservRunError:
836 # egrep exited with a return code of 1 meaning none of the possible
837 # lightsensor files existed.
838 return None
839
840
Aviv Keshet74c89a92013-02-04 15:18:30 -0800841 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -0700842 def has_bluetooth(self):
843 """Determine the correct board label for this host.
844
845 @returns the string 'bluetooth' if this host has bluetooth or
846 None if it does not.
847 """
848 try:
849 self.run('test -d /sys/class/bluetooth/hci0')
850 # test exited with a return code of 0.
851 return 'bluetooth'
852 except error.AutoservRunError:
853 # test exited with a return code 1 meaning the directory did not
854 # exist.
855 return None
856
857
858 def get_labels(self):
859 """Return a list of labels for this given host.
860
861 This is the main way to retrieve all the automatic labels for a host
862 as it will run through all the currently implemented label functions.
863 """
864 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -0800865 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -0700866 label = label_function(self)
867 if label:
868 labels.append(label)
869 return labels