blob: 9e5838312c5985dc106f56a7b3e67b39d9a046fa [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
J. Richard Barnette1d78b012012-05-15 13:56:30 -07006import logging
Simran Basid5e5e272012-09-24 15:23:59 -07007import re
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07009import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070010import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070011
J. Richard Barnette45e93de2012-04-11 17:24:15 -070012from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080013from autotest_lib.client.common_lib import error
14from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070015from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080016from autotest_lib.client.common_lib.cros import dev_server
Richard Barnette82c35912012-11-20 10:09:10 -080017from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070018from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070019from autotest_lib.server import autotest
J. Richard Barnette45e93de2012-04-11 17:24:15 -070020from autotest_lib.server import site_host_attributes
J. Richard Barnette67ccb872012-04-19 16:34:56 -070021from autotest_lib.server.cros import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070022from autotest_lib.server.hosts import remote
Simran Basidcff4252012-11-20 16:13:20 -080023from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070024
Richard Barnette82c35912012-11-20 10:09:10 -080025# Importing frontend.afe.models requires a full Autotest
26# installation (with the Django modules), not just the source
27# repository. Most developers won't have the full installation, so
28# the imports below will fail for them.
29#
30# The fix is to catch import exceptions, and set `models` to `None`
31# on failure. This has the side effect that
32# SiteHost._get_board_from_afe() will fail: That will manifest as
33# failures during Repair jobs leaving the DUT as "Repair Failed".
34# In practice, you can't test Repair jobs without a full
35# installation, so that kind of failure isn't expected.
36try:
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080037 # pylint: disable=W0611
Richard Barnette82c35912012-11-20 10:09:10 -080038 from autotest_lib.frontend import setup_django_environment
39 from autotest_lib.frontend.afe import models
40except:
41 models = None
42
Simran Basid5e5e272012-09-24 15:23:59 -070043
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080044def _make_servo_hostname(hostname):
45 host_parts = hostname.split('.')
46 host_parts[0] = host_parts[0] + '-servo'
47 return '.'.join(host_parts)
48
49
50def _get_lab_servo(target_hostname):
51 """Instantiate a Servo for |target_hostname| in the lab.
52
53 Assuming that |target_hostname| is a device in the CrOS test
54 lab, create and return a Servo object pointed at the servo
55 attached to that DUT. The servo in the test lab is assumed
56 to already have servod up and running on it.
57
58 @param target_hostname: device whose servo we want to target.
59 @return an appropriately configured Servo instance.
60 """
61 servo_host = _make_servo_hostname(target_hostname)
62 if utils.host_is_in_lab_zone(servo_host):
63 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080064 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080065 except: # pylint: disable=W0702
66 # TODO(jrbarnette): Long-term, if we can't get to
67 # a servo in the lab, we want to fail, so we should
68 # pass any exceptions along. Short-term, we're not
69 # ready to rely on servo, so we ignore failures.
70 pass
71 return None
72
73
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070074def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
75 connect_timeout=None, alive_interval=None):
76 """Override default make_ssh_command to use options tuned for Chrome OS.
77
78 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070079 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
80 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070081
Dale Curtisaa5eedb2011-08-23 16:18:52 -070082 - ServerAliveInterval=180; which causes SSH to ping connection every
83 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
84 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
85 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070086
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070087 - ServerAliveCountMax=3; consistency with remote_access.sh.
88
89 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
90 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070091
92 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
93 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070094
95 - SSH protocol forced to 2; needed for ServerAliveInterval.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080096
97 @param user User name to use for the ssh connection.
98 @param port Port on the target host to use for ssh connection.
99 @param opts Additional options to the ssh command.
100 @param hosts_file Ignored.
101 @param connect_timeout Ignored.
102 @param alive_interval Ignored.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700103 """
104 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
105 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -0700106 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
107 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
108 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700109 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700110
111
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800112
Aviv Keshet74c89a92013-02-04 15:18:30 -0800113def add_label_detector(label_function_list, label_list=None, label=None):
114 """Decorator used to group functions together into the provided list.
115 @param label_function_list: List of label detecting functions to add
116 decorated function to.
117 @param label_list: List of detectable labels to add detectable labels to.
118 (Default: None)
119 @param label: Label string that is detectable by this detection function
120 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800121 """
Simran Basic6f1f7a2012-10-16 10:47:46 -0700122 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -0800123 """
124 @param func: The function to be added as a detector.
125 """
126 label_function_list.append(func)
127 if label and label_list is not None:
128 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700129 return func
130 return add_func
131
132
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700133class SiteHost(remote.RemoteHost):
134 """Chromium OS specific subclass of Host."""
135
136 _parser = autoserv_parser.autoserv_parser
137
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800138 # Time to wait for new kernel to be marked successful after
139 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -0700140 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700141
Richard Barnette03a0c132012-11-05 12:40:35 -0800142 # Timeout values (in seconds) associated with various Chrome OS
143 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700144 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800145 # In general, a good rule of thumb is that the timeout can be up
146 # to twice the typical measured value on the slowest platform.
147 # The times here have not necessarily been empirically tested to
148 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700149 #
150 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800151 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
152 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700153 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800154 # other things, this must account for the 30 second dev-mode
155 # screen delay and time to start the network,
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700156 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800157 # including the 30 second dev-mode delay and time to start the
158 # network,
159 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700160 # REBOOT_TIMEOUT: Combination of shutdown and reboot times.
Richard Barnette03a0c132012-11-05 12:40:35 -0800161 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700162
163 SLEEP_TIMEOUT = 2
164 RESUME_TIMEOUT = 5
165 BOOT_TIMEOUT = 45
166 USB_BOOT_TIMEOUT = 150
167 SHUTDOWN_TIMEOUT = 5
168 REBOOT_TIMEOUT = SHUTDOWN_TIMEOUT + BOOT_TIMEOUT
Richard Barnette03a0c132012-11-05 12:40:35 -0800169 _INSTALL_TIMEOUT = 240
170
171 _DEFAULT_SERVO_URL_FORMAT = ('/static/servo-images/'
172 '%(board)s_test_image.bin')
173
174 # TODO(jrbarnette): Servo repair is restricted to x86-alex,
175 # because the existing servo client code won't work on other
176 # boards. http://crosbug.com/36973
177 _SERVO_REPAIR_WHITELIST = [ 'x86-alex' ]
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800178
179
Richard Barnette82c35912012-11-20 10:09:10 -0800180 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
181 'rpm_recovery_boards', type=str).split(',')
182
183 _MAX_POWER_CYCLE_ATTEMPTS = 6
184 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
185 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
186 'host[0-9]+')
187 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
188 'in_illuminance0_raw',
189 'illuminance0_input']
190 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
191 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800192 _DETECTABLE_LABELS = []
193 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
194 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700195
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800196 # Constants used in ping_wait_up() and ping_wait_down().
197 #
198 # _PING_WAIT_COUNT is the approximate number of polling
199 # cycles to use when waiting for a host state change.
200 #
201 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
202 # for arguments to the internal _ping_wait_for_status()
203 # method.
204 _PING_WAIT_COUNT = 40
205 _PING_STATUS_DOWN = False
206 _PING_STATUS_UP = True
207
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800208
J. Richard Barnette964fba02012-10-24 17:34:29 -0700209 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800210 def get_servo_arguments(args_dict):
211 """Extract servo options from `args_dict` and return the result.
212
213 Take the provided dictionary of argument options and return
214 a subset that represent standard arguments needed to
215 construct a servo object for a host. The intent is to
216 provide standard argument processing from run_remote_tests
217 for tests that require a servo to operate.
218
219 Recommended usage:
220 ~~~~~~~~
221 args_dict = utils.args_to_dict(args)
222 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
223 host = hosts.create_host(machine, servo_args=servo_args)
224 ~~~~~~~~
225
226 @param args_dict Dictionary from which to extract the servo
227 arguments.
228 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700229 servo_args = {}
230 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800231 if arg in args_dict:
232 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700233 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700234
J. Richard Barnette964fba02012-10-24 17:34:29 -0700235
236 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700237 """Initialize superclasses, and |self.servo|.
238
239 For creating the host servo object, there are three
240 possibilities: First, if the host is a lab system known to
241 have a servo board, we connect to that servo unconditionally.
242 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700243 servo features for testing, it will pass settings for
244 `servo_host`, `servo_port`, or both. If neither of these
245 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700246
247 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700248 super(SiteHost, self)._initialize(hostname=hostname,
249 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700250 # self.env is a dictionary of environment variable settings
251 # to be exported for commands run on the host.
252 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
253 # errors that might happen.
254 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700255 self._xmlrpc_proxy_map = {}
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800256 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700257 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700258 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700259
260
Chris Sosaa3ac2152012-05-23 22:23:13 -0700261 def machine_install(self, update_url=None, force_update=False,
262 local_devserver=False):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700263 if not update_url and self._parser.options.image:
264 update_url = self._parser.options.image
265 elif not update_url:
266 raise autoupdater.ChromiumOSError(
267 'Update failed. No update URL provided.')
268
Chris Sosafab08082013-01-04 15:21:20 -0800269 # In case the system is in a bad state, we always reboot the machine
270 # before machine_install.
271 self.reboot(timeout=60, wait=True)
272
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700273 # Attempt to update the system.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700274 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
275 local_devserver=local_devserver)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700276 if updater.run_update(force_update):
277 # Figure out active and inactive kernel.
278 active_kernel, inactive_kernel = updater.get_kernel_state()
279
280 # Ensure inactive kernel has higher priority than active.
281 if (updater.get_kernel_priority(inactive_kernel)
282 < updater.get_kernel_priority(active_kernel)):
283 raise autoupdater.ChromiumOSError(
284 'Update failed. The priority of the inactive kernel'
285 ' partition is less than that of the active kernel'
286 ' partition.')
287
Scott Zawalski21902002012-09-19 17:57:00 -0400288 update_engine_log = '/var/log/update_engine.log'
289 logging.info('Dumping %s', update_engine_log)
290 self.run('cat %s' % update_engine_log)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800291 # Updater has returned successfully; reboot the host.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700292 self.reboot(timeout=60, wait=True)
Chris Sosae146ed82012-09-19 17:58:36 -0700293 # Touch the lab machine file to leave a marker that distinguishes
294 # this image from other test images.
Richard Barnette82c35912012-11-20 10:09:10 -0800295 self.run('touch %s' % self._LAB_MACHINE_FILE)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700296
297 # Following the reboot, verify the correct version.
298 updater.check_version()
299
300 # Figure out newly active kernel.
301 new_active_kernel, _ = updater.get_kernel_state()
302
303 # Ensure that previously inactive kernel is now the active kernel.
304 if new_active_kernel != inactive_kernel:
305 raise autoupdater.ChromiumOSError(
306 'Update failed. New kernel partition is not active after'
307 ' boot.')
308
309 host_attributes = site_host_attributes.HostAttributes(self.hostname)
310 if host_attributes.has_chromeos_firmware:
311 # Wait until tries == 0 and success, or until timeout.
312 utils.poll_for_condition(
313 lambda: (updater.get_kernel_tries(new_active_kernel) == 0
314 and updater.get_kernel_success(new_active_kernel)),
315 exception=autoupdater.ChromiumOSError(
316 'Update failed. Timed out waiting for system to mark'
317 ' new kernel as successful.'),
318 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
319
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700320 # Clean up any old autotest directories which may be lying around.
321 for path in global_config.global_config.get_config_value(
322 'AUTOSERV', 'client_autodir_paths', type=list):
323 self.run('rm -rf ' + path)
324
325
Richard Barnette82c35912012-11-20 10:09:10 -0800326 def _get_board_from_afe(self):
327 """Retrieve this host's board from its labels in the AFE.
328
329 Looks for a host label of the form "board:<board>", and
330 returns the "<board>" part of the label. `None` is returned
331 if there is not a single, unique label matching the pattern.
332
333 @returns board from label, or `None`.
334 """
335 host_model = models.Host.objects.get(hostname=self.hostname)
336 board_labels = filter(lambda l: l.name.startswith('board:'),
337 host_model.labels.all())
338 board_name = None
339 if len(board_labels) == 1:
340 board_name = board_labels[0].name.split(':', 1)[1]
341 elif len(board_labels) == 0:
342 logging.error('Host %s does not have a board label.',
343 self.hostname)
344 else:
345 logging.error('Host %s has multiple board labels.',
346 self.hostname)
347 return board_name
348
349
Richard Barnette03a0c132012-11-05 12:40:35 -0800350 def _servo_repair(self, board):
351 """Attempt to repair this host using an attached Servo.
352
353 Re-install the OS on the DUT by 1) installing a test image
354 on a USB storage device attached to the Servo board,
355 2) booting that image in recovery mode, and then
356 3) installing the image.
357
358 """
359 server = dev_server.ImageServer.devserver_url_for_servo(board)
360 image = server + (self._DEFAULT_SERVO_URL_FORMAT %
361 { 'board': board })
362 self.servo.install_recovery_image(image)
363 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
364 raise error.AutoservError('DUT failed to boot from USB'
365 ' after %d seconds' %
366 self.USB_BOOT_TIMEOUT)
367 self.run('chromeos-install --yes',
368 timeout=self._INSTALL_TIMEOUT)
369 self.servo.power_long_press()
370 self.servo.set('usb_mux_sel1', 'servo_sees_usbkey')
371 self.servo.power_short_press()
372 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
373 raise error.AutoservError('DUT failed to reboot installed '
374 'test image after %d seconds' %
375 self.BOOT_TIMEOUT)
376
377
Richard Barnette82c35912012-11-20 10:09:10 -0800378 def _powercycle_to_repair(self):
379 """Utilize the RPM Infrastructure to bring the host back up.
380
381 If the host is not up/repaired after the first powercycle we utilize
382 auto fallback to the last good install by powercycling and rebooting the
383 host 6 times.
384 """
385 logging.info('Attempting repair via RPM powercycle.')
386 failed_cycles = 0
387 self.power_cycle()
388 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
389 failed_cycles += 1
390 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
391 raise error.AutoservError('Powercycled host %s %d times; '
392 'device did not come back online.' %
393 (self.hostname, failed_cycles))
394 self.power_cycle()
395 if failed_cycles == 0:
396 logging.info('Powercycling was successful first time.')
397 else:
398 logging.info('Powercycling was successful after %d failures.',
399 failed_cycles)
400
401
402 def repair_full(self):
403 """Repair a host for repair level NO_PROTECTION.
404
405 This overrides the base class function for repair; it does
406 not call back to the parent class, but instead offers a
407 simplified implementation based on the capabilities in the
408 Chrome OS test lab.
409
410 Repair follows this sequence:
411 1. If the DUT passes `self.verify()`, do nothing.
412 2. If the DUT can be power-cycled via RPM, try to repair
413 by power-cycling.
414
415 As with the parent method, the last operation performed on
416 the DUT must be to call `self.verify()`; if that call fails,
417 the exception it raises is passed back to the caller.
418 """
419 try:
420 self.verify()
421 except:
422 host_board = self._get_board_from_afe()
Richard Barnette03a0c132012-11-05 12:40:35 -0800423 if host_board is None:
424 logging.error('host %s has no board; failing repair',
425 self.hostname)
Richard Barnette82c35912012-11-20 10:09:10 -0800426 raise
Richard Barnette03a0c132012-11-05 12:40:35 -0800427 if (self.servo and
428 host_board in self._SERVO_REPAIR_WHITELIST):
429 self._servo_repair(host_board)
430 elif (self.has_power() and
431 host_board in self._RPM_RECOVERY_BOARDS):
432 self._powercycle_to_repair()
433 else:
434 logging.error('host %s has no servo and no RPM control; '
435 'failing repair', self.hostname)
436 raise
Richard Barnette82c35912012-11-20 10:09:10 -0800437 self.verify()
438
439
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700440 def close(self):
441 super(SiteHost, self).close()
442 self.xmlrpc_disconnect_all()
443
444
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700445 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700446 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800447 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500448 try:
449 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
450 '_clear_login_prompt_state')
451 self.run('restart ui')
452 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
453 '_wait_for_login_prompt')
Alex Millerf4517962013-02-25 15:03:02 -0800454 except (error.AutotestRunError, error.AutoservRunError):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500455 logging.warn('Unable to restart ui, rebooting device.')
456 # Since restarting the UI fails fall back to normal Autotest
457 # cleanup routines, i.e. reboot the machine.
458 super(SiteHost, self).cleanup()
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700459
460
Simran Basi154f5582012-10-23 16:27:11 -0700461 # TODO (sbasi) crosbug.com/35656
462 # Renamed the sitehost cleanup method so we don't go down this pathway.
463 # def cleanup(self):
464 def cleanup_poweron(self):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700465 """Special cleanup method to make sure hosts always get power back."""
Chris Sosa9479fcd2012-10-09 13:44:22 -0700466 super(SiteHost, self).cleanup()
Simran Basid5e5e272012-09-24 15:23:59 -0700467 if self.has_power():
Simran Basifd23fb22012-10-22 17:56:22 -0700468 try:
469 self.power_on()
Chris Sosafab08082013-01-04 15:21:20 -0800470 except rpm_client.RemotePowerException:
Simran Basifd23fb22012-10-22 17:56:22 -0700471 # If cleanup has completed but there was an issue with the RPM
472 # Infrastructure, log an error message rather than fail cleanup
473 logging.error('Failed to turn Power On for this host after '
474 'cleanup through the RPM Infrastructure.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700475
476
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700477 def reboot(self, **dargs):
478 """
479 This function reboots the site host. The more generic
480 RemoteHost.reboot() performs sync and sleeps for 5
481 seconds. This is not necessary for Chrome OS devices as the
482 sync should be finished in a short time during the reboot
483 command.
484 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800485 if 'reboot_cmd' not in dargs:
486 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
487 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700488 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800489 if 'fastsync' not in dargs:
490 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700491 super(SiteHost, self).reboot(**dargs)
492
493
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700494 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800495 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700496
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800497 Tests for the following conditions:
498 1. All conditions tested by the parent version of this
499 function.
500 2. Sufficient space in /mnt/stateful_partition.
501 3. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700502
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700503 """
504 super(SiteHost, self).verify_software()
505 self.check_diskspace(
506 '/mnt/stateful_partition',
507 global_config.global_config.get_config_value(
508 'SERVER', 'gb_diskspace_required', type=int,
509 default=20))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800510 self.run('update_engine_client --status')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700511
512
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800513 def xmlrpc_connect(self, command, port, command_name=None):
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700514 """Connect to an XMLRPC server on the host.
515
516 The `command` argument should be a simple shell command that
517 starts an XMLRPC server on the given `port`. The command
518 must not daemonize, and must terminate cleanly on SIGTERM.
519 The command is started in the background on the host, and a
520 local XMLRPC client for the server is created and returned
521 to the caller.
522
523 Note that the process of creating an XMLRPC client makes no
524 attempt to connect to the remote server; the caller is
525 responsible for determining whether the server is running
526 correctly, and is ready to serve requests.
527
528 @param command Shell command to start the server.
529 @param port Port number on which the server is expected to
530 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800531 @param command_name String to use as input to `pkill` to
532 terminate the XMLRPC server on the host.
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700533 """
534 self.xmlrpc_disconnect(port)
535
536 # Chrome OS on the target closes down most external ports
537 # for security. We could open the port, but doing that
538 # would conflict with security tests that check that only
539 # expected ports are open. So, to get to the port on the
540 # target we use an ssh tunnel.
541 local_port = utils.get_unused_port()
542 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
543 ssh_cmd = make_ssh_command(opts=tunnel_options)
544 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
545 logging.debug('Full tunnel command: %s', tunnel_cmd)
546 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
547 logging.debug('Started XMLRPC tunnel, local = %d'
548 ' remote = %d, pid = %d',
549 local_port, port, tunnel_proc.pid)
550
551 # Start the server on the host. Redirection in the command
552 # below is necessary, because 'ssh' won't terminate until
553 # background child processes close stdin, stdout, and
554 # stderr.
555 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
556 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
557 logging.debug('Started XMLRPC server on host %s, pid = %s',
558 self.hostname, remote_pid)
559
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800560 self._xmlrpc_proxy_map[port] = (command_name, tunnel_proc)
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700561 rpc_url = 'http://localhost:%d' % local_port
562 return xmlrpclib.ServerProxy(rpc_url, allow_none=True)
563
564
565 def xmlrpc_disconnect(self, port):
566 """Disconnect from an XMLRPC server on the host.
567
568 Terminates the remote XMLRPC server previously started for
569 the given `port`. Also closes the local ssh tunnel created
570 for the connection to the host. This function does not
571 directly alter the state of a previously returned XMLRPC
572 client object; however disconnection will cause all
573 subsequent calls to methods on the object to fail.
574
575 This function does nothing if requested to disconnect a port
576 that was not previously connected via `self.xmlrpc_connect()`
577
578 @param port Port number passed to a previous call to
579 `xmlrpc_connect()`
580 """
581 if port not in self._xmlrpc_proxy_map:
582 return
583 entry = self._xmlrpc_proxy_map[port]
584 remote_name = entry[0]
585 tunnel_proc = entry[1]
586 if remote_name:
587 # We use 'pkill' to find our target process rather than
588 # a PID, because the host may have rebooted since
589 # connecting, and we don't want to kill an innocent
590 # process with the same PID.
591 #
592 # 'pkill' helpfully exits with status 1 if no target
593 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -0700594 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700595 # status.
596 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
597
598 if tunnel_proc.poll() is None:
599 tunnel_proc.terminate()
600 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
601 else:
602 logging.debug('Tunnel pid %d terminated early, status %d',
603 tunnel_proc.pid, tunnel_proc.returncode)
604 del self._xmlrpc_proxy_map[port]
605
606
607 def xmlrpc_disconnect_all(self):
608 """Disconnect all known XMLRPC proxy ports."""
609 for port in self._xmlrpc_proxy_map.keys():
610 self.xmlrpc_disconnect(port)
611
612
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800613 def _ping_check_status(self, status):
614 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700615
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800616 @param status Check the ping status against this value.
617 @return True iff `status` and the result of ping are the same
618 (i.e. both True or both False).
619
620 """
621 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
622 return not (status ^ (ping_val == 0))
623
624 def _ping_wait_for_status(self, status, timeout):
625 """Wait for the host to have a given status (UP or DOWN).
626
627 Status is checked by polling. Polling will not last longer
628 than the number of seconds in `timeout`. The polling
629 interval will be long enough that only approximately
630 _PING_WAIT_COUNT polling cycles will be executed, subject
631 to a maximum interval of about one minute.
632
633 @param status Waiting will stop immediately if `ping` of the
634 host returns this status.
635 @param timeout Poll for at most this many seconds.
636 @return True iff the host status from `ping` matched the
637 requested status at the time of return.
638
639 """
640 # _ping_check_status() takes about 1 second, hence the
641 # "- 1" in the formula below.
642 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
643 end_time = time.time() + timeout
644 while time.time() <= end_time:
645 if self._ping_check_status(status):
646 return True
647 if poll_interval > 0:
648 time.sleep(poll_interval)
649
650 # The last thing we did was sleep(poll_interval), so it may
651 # have been too long since the last `ping`. Check one more
652 # time, just to be sure.
653 return self._ping_check_status(status)
654
655 def ping_wait_up(self, timeout):
656 """Wait for the host to respond to `ping`.
657
658 N.B. This method is not a reliable substitute for
659 `wait_up()`, because a host that responds to ping will not
660 necessarily respond to ssh. This method should only be used
661 if the target DUT can be considered functional even if it
662 can't be reached via ssh.
663
664 @param timeout Minimum time to allow before declaring the
665 host to be non-responsive.
666 @return True iff the host answered to ping before the timeout.
667
668 """
669 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700670
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800671 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700672 """Wait until the host no longer responds to `ping`.
673
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800674 This function can be used as a slightly faster version of
675 `wait_down()`, by avoiding potentially long ssh timeouts.
676
677 @param timeout Minimum time to allow for the host to become
678 non-responsive.
679 @return True iff the host quit answering ping before the
680 timeout.
681
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700682 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800683 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700684
685 def test_wait_for_sleep(self):
686 """Wait for the client to enter low-power sleep mode.
687
688 The test for "is asleep" can't distinguish a system that is
689 powered off; to confirm that the unit was asleep, it is
690 necessary to force resume, and then call
691 `test_wait_for_resume()`.
692
693 This function is expected to be called from a test as part
694 of a sequence like the following:
695
696 ~~~~~~~~
697 boot_id = host.get_boot_id()
698 # trigger sleep on the host
699 host.test_wait_for_sleep()
700 # trigger resume on the host
701 host.test_wait_for_resume(boot_id)
702 ~~~~~~~~
703
704 @exception TestFail The host did not go to sleep within
705 the allowed time.
706 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800707 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700708 raise error.TestFail(
709 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700710 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700711
712
713 def test_wait_for_resume(self, old_boot_id):
714 """Wait for the client to resume from low-power sleep mode.
715
716 The `old_boot_id` parameter should be the value from
717 `get_boot_id()` obtained prior to entering sleep mode. A
718 `TestFail` exception is raised if the boot id changes.
719
720 See @ref test_wait_for_sleep for more on this function's
721 usage.
722
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800723 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700724 target host went to sleep.
725
726 @exception TestFail The host did not respond within the
727 allowed time.
728 @exception TestFail The host responded, but the boot id test
729 indicated a reboot rather than a sleep
730 cycle.
731 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700732 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700733 raise error.TestFail(
734 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700735 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700736 else:
737 new_boot_id = self.get_boot_id()
738 if new_boot_id != old_boot_id:
739 raise error.TestFail(
740 'client rebooted, but sleep was expected'
741 ' (old boot %s, new boot %s)'
742 % (old_boot_id, new_boot_id))
743
744
745 def test_wait_for_shutdown(self):
746 """Wait for the client to shut down.
747
748 The test for "has shut down" can't distinguish a system that
749 is merely asleep; to confirm that the unit was down, it is
750 necessary to force boot, and then call test_wait_for_boot().
751
752 This function is expected to be called from a test as part
753 of a sequence like the following:
754
755 ~~~~~~~~
756 boot_id = host.get_boot_id()
757 # trigger shutdown on the host
758 host.test_wait_for_shutdown()
759 # trigger boot on the host
760 host.test_wait_for_boot(boot_id)
761 ~~~~~~~~
762
763 @exception TestFail The host did not shut down within the
764 allowed time.
765 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800766 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700767 raise error.TestFail(
768 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700769 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700770
771
772 def test_wait_for_boot(self, old_boot_id=None):
773 """Wait for the client to boot from cold power.
774
775 The `old_boot_id` parameter should be the value from
776 `get_boot_id()` obtained prior to shutting down. A
777 `TestFail` exception is raised if the boot id does not
778 change. The boot id test is omitted if `old_boot_id` is not
779 specified.
780
781 See @ref test_wait_for_shutdown for more on this function's
782 usage.
783
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800784 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700785 shut down.
786
787 @exception TestFail The host did not respond within the
788 allowed time.
789 @exception TestFail The host responded, but the boot id test
790 indicated that there was no reboot.
791 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700792 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700793 raise error.TestFail(
794 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700795 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700796 elif old_boot_id:
797 if self.get_boot_id() == old_boot_id:
798 raise error.TestFail(
799 'client is back up, but did not reboot'
800 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -0700801
802
803 @staticmethod
804 def check_for_rpm_support(hostname):
805 """For a given hostname, return whether or not it is powered by an RPM.
806
807 @return None if this host does not follows the defined naming format
808 for RPM powered DUT's in the lab. If it does follow the format,
809 it returns a regular expression MatchObject instead.
810 """
Richard Barnette82c35912012-11-20 10:09:10 -0800811 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -0700812
813
814 def has_power(self):
815 """For this host, return whether or not it is powered by an RPM.
816
817 @return True if this host is in the CROS lab and follows the defined
818 naming format.
819 """
820 return SiteHost.check_for_rpm_support(self.hostname)
821
822
Simran Basid5e5e272012-09-24 15:23:59 -0700823 def power_off(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800824 """Turn off power to this host via RPM."""
Simran Basidcff4252012-11-20 16:13:20 -0800825 rpm_client.set_power(self.hostname, 'OFF')
Simran Basid5e5e272012-09-24 15:23:59 -0700826
827
828 def power_on(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800829 """Turn on power to this host via RPM."""
Simran Basidcff4252012-11-20 16:13:20 -0800830 rpm_client.set_power(self.hostname, 'ON')
Simran Basid5e5e272012-09-24 15:23:59 -0700831
832
833 def power_cycle(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800834 """Cycle power to this host by turning it OFF, then ON."""
Simran Basidcff4252012-11-20 16:13:20 -0800835 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -0700836
837
838 def get_platform(self):
839 """Determine the correct platform label for this host.
840
841 @returns a string representing this host's platform.
842 """
843 crossystem = utils.Crossystem(self)
844 crossystem.init()
845 # Extract fwid value and use the leading part as the platform id.
846 # fwid generally follow the format of {platform}.{firmware version}
847 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
848 platform = crossystem.fwid().split('.')[0].lower()
849 # Newer platforms start with 'Google_' while the older ones do not.
850 return platform.replace('google_', '')
851
852
Aviv Keshet74c89a92013-02-04 15:18:30 -0800853 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -0700854 def get_board(self):
855 """Determine the correct board label for this host.
856
857 @returns a string representing this host's board.
858 """
859 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
860 run_method=self.run)
861 board = release_info['CHROMEOS_RELEASE_BOARD']
862 # Devices in the lab generally have the correct board name but our own
863 # development devices have {board_name}-signed-{key_type}. The board
864 # name may also begin with 'x86-' which we need to keep.
865 if 'x86' not in board:
866 return 'board:%s' % board.split('-')[0]
867 return 'board:%s' % '-'.join(board.split('-')[0:2])
868
869
Aviv Keshet74c89a92013-02-04 15:18:30 -0800870 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -0700871 def has_lightsensor(self):
872 """Determine the correct board label for this host.
873
874 @returns the string 'lightsensor' if this host has a lightsensor or
875 None if it does not.
876 """
877 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -0800878 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -0700879 try:
880 # Run the search cmd following the symlinks. Stderr_tee is set to
881 # None as there can be a symlink loop, but the command will still
882 # execute correctly with a few messages printed to stderr.
883 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
884 return 'lightsensor'
885 except error.AutoservRunError:
886 # egrep exited with a return code of 1 meaning none of the possible
887 # lightsensor files existed.
888 return None
889
890
Aviv Keshet74c89a92013-02-04 15:18:30 -0800891 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -0700892 def has_bluetooth(self):
893 """Determine the correct board label for this host.
894
895 @returns the string 'bluetooth' if this host has bluetooth or
896 None if it does not.
897 """
898 try:
899 self.run('test -d /sys/class/bluetooth/hci0')
900 # test exited with a return code of 0.
901 return 'bluetooth'
902 except error.AutoservRunError:
903 # test exited with a return code 1 meaning the directory did not
904 # exist.
905 return None
906
907
908 def get_labels(self):
909 """Return a list of labels for this given host.
910
911 This is the main way to retrieve all the automatic labels for a host
912 as it will run through all the currently implemented label functions.
913 """
914 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -0800915 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -0700916 label = label_function(self)
917 if label:
918 labels.append(label)
919 return labels