blob: be7c022843a60e24d57b88377c555c826fe4502b [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
J. Richard Barnette1d78b012012-05-15 13:56:30 -07005import logging
Simran Basid5e5e272012-09-24 15:23:59 -07006import re
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07008import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -07009import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070010
J. Richard Barnette45e93de2012-04-11 17:24:15 -070011from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080012from autotest_lib.client.common_lib import error
13from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070014from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080015from autotest_lib.client.common_lib.cros import dev_server
Richard Barnette82c35912012-11-20 10:09:10 -080016from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070018from autotest_lib.server import autotest
J. Richard Barnette45e93de2012-04-11 17:24:15 -070019from autotest_lib.server import site_host_attributes
J. Richard Barnette67ccb872012-04-19 16:34:56 -070020from autotest_lib.server.cros import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.server.hosts import remote
Simran Basidcff4252012-11-20 16:13:20 -080022from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070023
Richard Barnette82c35912012-11-20 10:09:10 -080024# Importing frontend.afe.models requires a full Autotest
25# installation (with the Django modules), not just the source
26# repository. Most developers won't have the full installation, so
27# the imports below will fail for them.
28#
29# The fix is to catch import exceptions, and set `models` to `None`
30# on failure. This has the side effect that
31# SiteHost._get_board_from_afe() will fail: That will manifest as
32# failures during Repair jobs leaving the DUT as "Repair Failed".
33# In practice, you can't test Repair jobs without a full
34# installation, so that kind of failure isn't expected.
35try:
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080036 # pylint: disable=W0611
Richard Barnette82c35912012-11-20 10:09:10 -080037 from autotest_lib.frontend import setup_django_environment
38 from autotest_lib.frontend.afe import models
39except:
40 models = None
41
Simran Basid5e5e272012-09-24 15:23:59 -070042
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070043def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
44 connect_timeout=None, alive_interval=None):
45 """Override default make_ssh_command to use options tuned for Chrome OS.
46
47 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070048 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
49 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070050
Dale Curtisaa5eedb2011-08-23 16:18:52 -070051 - ServerAliveInterval=180; which causes SSH to ping connection every
52 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
53 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
54 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070055
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070056 - ServerAliveCountMax=3; consistency with remote_access.sh.
57
58 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
59 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070060
61 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
62 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070063
64 - SSH protocol forced to 2; needed for ServerAliveInterval.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080065
66 @param user User name to use for the ssh connection.
67 @param port Port on the target host to use for ssh connection.
68 @param opts Additional options to the ssh command.
69 @param hosts_file Ignored.
70 @param connect_timeout Ignored.
71 @param alive_interval Ignored.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070072 """
73 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
74 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070075 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
76 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
77 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070078 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070079
80
Simran Basic6f1f7a2012-10-16 10:47:46 -070081def add_function_to_list(functions_list):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080082 """Decorator used to group functions together into the provided list.
83
84 @param functions_list list to which the decorated function will
85 be added.
86 """
87 # pylint: disable=C0111
Simran Basic6f1f7a2012-10-16 10:47:46 -070088 def add_func(func):
89 functions_list.append(func)
90 return func
91 return add_func
92
93
J. Richard Barnette45e93de2012-04-11 17:24:15 -070094class SiteHost(remote.RemoteHost):
95 """Chromium OS specific subclass of Host."""
96
97 _parser = autoserv_parser.autoserv_parser
98
Richard Barnette0c73ffc2012-11-19 15:21:18 -080099 # Time to wait for new kernel to be marked successful after
100 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -0700101 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700102
Richard Barnette03a0c132012-11-05 12:40:35 -0800103 # Timeout values (in seconds) associated with various Chrome OS
104 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700105 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800106 # In general, a good rule of thumb is that the timeout can be up
107 # to twice the typical measured value on the slowest platform.
108 # The times here have not necessarily been empirically tested to
109 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700110 #
111 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800112 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
113 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700114 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800115 # other things, this must account for the 30 second dev-mode
116 # screen delay and time to start the network,
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700117 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800118 # including the 30 second dev-mode delay and time to start the
119 # network,
120 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700121 # REBOOT_TIMEOUT: Combination of shutdown and reboot times.
Richard Barnette03a0c132012-11-05 12:40:35 -0800122 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700123
124 SLEEP_TIMEOUT = 2
125 RESUME_TIMEOUT = 5
126 BOOT_TIMEOUT = 45
127 USB_BOOT_TIMEOUT = 150
128 SHUTDOWN_TIMEOUT = 5
129 REBOOT_TIMEOUT = SHUTDOWN_TIMEOUT + BOOT_TIMEOUT
Richard Barnette03a0c132012-11-05 12:40:35 -0800130 _INSTALL_TIMEOUT = 240
131
132 _DEFAULT_SERVO_URL_FORMAT = ('/static/servo-images/'
133 '%(board)s_test_image.bin')
134
135 # TODO(jrbarnette): Servo repair is restricted to x86-alex,
136 # because the existing servo client code won't work on other
137 # boards. http://crosbug.com/36973
138 _SERVO_REPAIR_WHITELIST = [ 'x86-alex' ]
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800139
140
Richard Barnette82c35912012-11-20 10:09:10 -0800141 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
142 'rpm_recovery_boards', type=str).split(',')
143
144 _MAX_POWER_CYCLE_ATTEMPTS = 6
145 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
146 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
147 'host[0-9]+')
148 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
149 'in_illuminance0_raw',
150 'illuminance0_input']
151 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
152 _LABEL_FUNCTIONS = []
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700153
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800154
J. Richard Barnette964fba02012-10-24 17:34:29 -0700155 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800156 def get_servo_arguments(args_dict):
157 """Extract servo options from `args_dict` and return the result.
158
159 Take the provided dictionary of argument options and return
160 a subset that represent standard arguments needed to
161 construct a servo object for a host. The intent is to
162 provide standard argument processing from run_remote_tests
163 for tests that require a servo to operate.
164
165 Recommended usage:
166 ~~~~~~~~
167 args_dict = utils.args_to_dict(args)
168 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
169 host = hosts.create_host(machine, servo_args=servo_args)
170 ~~~~~~~~
171
172 @param args_dict Dictionary from which to extract the servo
173 arguments.
174 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700175 servo_args = {}
176 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800177 if arg in args_dict:
178 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700179 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700180
J. Richard Barnette964fba02012-10-24 17:34:29 -0700181
182 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700183 """Initialize superclasses, and |self.servo|.
184
185 For creating the host servo object, there are three
186 possibilities: First, if the host is a lab system known to
187 have a servo board, we connect to that servo unconditionally.
188 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700189 servo features for testing, it will pass settings for
190 `servo_host`, `servo_port`, or both. If neither of these
191 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700192
193 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700194 super(SiteHost, self)._initialize(hostname=hostname,
195 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700196 # self.env is a dictionary of environment variable settings
197 # to be exported for commands run on the host.
198 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
199 # errors that might happen.
200 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700201 self._xmlrpc_proxy_map = {}
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700202 self.servo = servo.Servo.get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700203 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700204 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700205
206
Chris Sosaa3ac2152012-05-23 22:23:13 -0700207 def machine_install(self, update_url=None, force_update=False,
208 local_devserver=False):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700209 if not update_url and self._parser.options.image:
210 update_url = self._parser.options.image
211 elif not update_url:
212 raise autoupdater.ChromiumOSError(
213 'Update failed. No update URL provided.')
214
Chris Sosafab08082013-01-04 15:21:20 -0800215 # In case the system is in a bad state, we always reboot the machine
216 # before machine_install.
217 self.reboot(timeout=60, wait=True)
218
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700219 # Attempt to update the system.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700220 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
221 local_devserver=local_devserver)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700222 if updater.run_update(force_update):
223 # Figure out active and inactive kernel.
224 active_kernel, inactive_kernel = updater.get_kernel_state()
225
226 # Ensure inactive kernel has higher priority than active.
227 if (updater.get_kernel_priority(inactive_kernel)
228 < updater.get_kernel_priority(active_kernel)):
229 raise autoupdater.ChromiumOSError(
230 'Update failed. The priority of the inactive kernel'
231 ' partition is less than that of the active kernel'
232 ' partition.')
233
Scott Zawalski21902002012-09-19 17:57:00 -0400234 update_engine_log = '/var/log/update_engine.log'
235 logging.info('Dumping %s', update_engine_log)
236 self.run('cat %s' % update_engine_log)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800237 # Updater has returned successfully; reboot the host.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700238 self.reboot(timeout=60, wait=True)
Chris Sosae146ed82012-09-19 17:58:36 -0700239 # Touch the lab machine file to leave a marker that distinguishes
240 # this image from other test images.
Richard Barnette82c35912012-11-20 10:09:10 -0800241 self.run('touch %s' % self._LAB_MACHINE_FILE)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700242
243 # Following the reboot, verify the correct version.
244 updater.check_version()
245
246 # Figure out newly active kernel.
247 new_active_kernel, _ = updater.get_kernel_state()
248
249 # Ensure that previously inactive kernel is now the active kernel.
250 if new_active_kernel != inactive_kernel:
251 raise autoupdater.ChromiumOSError(
252 'Update failed. New kernel partition is not active after'
253 ' boot.')
254
255 host_attributes = site_host_attributes.HostAttributes(self.hostname)
256 if host_attributes.has_chromeos_firmware:
257 # Wait until tries == 0 and success, or until timeout.
258 utils.poll_for_condition(
259 lambda: (updater.get_kernel_tries(new_active_kernel) == 0
260 and updater.get_kernel_success(new_active_kernel)),
261 exception=autoupdater.ChromiumOSError(
262 'Update failed. Timed out waiting for system to mark'
263 ' new kernel as successful.'),
264 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
265
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700266 # Clean up any old autotest directories which may be lying around.
267 for path in global_config.global_config.get_config_value(
268 'AUTOSERV', 'client_autodir_paths', type=list):
269 self.run('rm -rf ' + path)
270
271
Richard Barnette82c35912012-11-20 10:09:10 -0800272 def _get_board_from_afe(self):
273 """Retrieve this host's board from its labels in the AFE.
274
275 Looks for a host label of the form "board:<board>", and
276 returns the "<board>" part of the label. `None` is returned
277 if there is not a single, unique label matching the pattern.
278
279 @returns board from label, or `None`.
280 """
281 host_model = models.Host.objects.get(hostname=self.hostname)
282 board_labels = filter(lambda l: l.name.startswith('board:'),
283 host_model.labels.all())
284 board_name = None
285 if len(board_labels) == 1:
286 board_name = board_labels[0].name.split(':', 1)[1]
287 elif len(board_labels) == 0:
288 logging.error('Host %s does not have a board label.',
289 self.hostname)
290 else:
291 logging.error('Host %s has multiple board labels.',
292 self.hostname)
293 return board_name
294
295
Richard Barnette03a0c132012-11-05 12:40:35 -0800296 def _servo_repair(self, board):
297 """Attempt to repair this host using an attached Servo.
298
299 Re-install the OS on the DUT by 1) installing a test image
300 on a USB storage device attached to the Servo board,
301 2) booting that image in recovery mode, and then
302 3) installing the image.
303
304 """
305 server = dev_server.ImageServer.devserver_url_for_servo(board)
306 image = server + (self._DEFAULT_SERVO_URL_FORMAT %
307 { 'board': board })
308 self.servo.install_recovery_image(image)
309 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
310 raise error.AutoservError('DUT failed to boot from USB'
311 ' after %d seconds' %
312 self.USB_BOOT_TIMEOUT)
313 self.run('chromeos-install --yes',
314 timeout=self._INSTALL_TIMEOUT)
315 self.servo.power_long_press()
316 self.servo.set('usb_mux_sel1', 'servo_sees_usbkey')
317 self.servo.power_short_press()
318 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
319 raise error.AutoservError('DUT failed to reboot installed '
320 'test image after %d seconds' %
321 self.BOOT_TIMEOUT)
322
323
Richard Barnette82c35912012-11-20 10:09:10 -0800324 def _powercycle_to_repair(self):
325 """Utilize the RPM Infrastructure to bring the host back up.
326
327 If the host is not up/repaired after the first powercycle we utilize
328 auto fallback to the last good install by powercycling and rebooting the
329 host 6 times.
330 """
331 logging.info('Attempting repair via RPM powercycle.')
332 failed_cycles = 0
333 self.power_cycle()
334 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
335 failed_cycles += 1
336 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
337 raise error.AutoservError('Powercycled host %s %d times; '
338 'device did not come back online.' %
339 (self.hostname, failed_cycles))
340 self.power_cycle()
341 if failed_cycles == 0:
342 logging.info('Powercycling was successful first time.')
343 else:
344 logging.info('Powercycling was successful after %d failures.',
345 failed_cycles)
346
347
348 def repair_full(self):
349 """Repair a host for repair level NO_PROTECTION.
350
351 This overrides the base class function for repair; it does
352 not call back to the parent class, but instead offers a
353 simplified implementation based on the capabilities in the
354 Chrome OS test lab.
355
356 Repair follows this sequence:
357 1. If the DUT passes `self.verify()`, do nothing.
358 2. If the DUT can be power-cycled via RPM, try to repair
359 by power-cycling.
360
361 As with the parent method, the last operation performed on
362 the DUT must be to call `self.verify()`; if that call fails,
363 the exception it raises is passed back to the caller.
364 """
365 try:
366 self.verify()
367 except:
368 host_board = self._get_board_from_afe()
Richard Barnette03a0c132012-11-05 12:40:35 -0800369 if host_board is None:
370 logging.error('host %s has no board; failing repair',
371 self.hostname)
Richard Barnette82c35912012-11-20 10:09:10 -0800372 raise
Richard Barnette03a0c132012-11-05 12:40:35 -0800373 if (self.servo and
374 host_board in self._SERVO_REPAIR_WHITELIST):
375 self._servo_repair(host_board)
376 elif (self.has_power() and
377 host_board in self._RPM_RECOVERY_BOARDS):
378 self._powercycle_to_repair()
379 else:
380 logging.error('host %s has no servo and no RPM control; '
381 'failing repair', self.hostname)
382 raise
Richard Barnette82c35912012-11-20 10:09:10 -0800383 self.verify()
384
385
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700386 def close(self):
387 super(SiteHost, self).close()
388 self.xmlrpc_disconnect_all()
389
390
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700391 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700392 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800393 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500394 try:
395 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
396 '_clear_login_prompt_state')
397 self.run('restart ui')
398 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
399 '_wait_for_login_prompt')
Scott Zawalski2eed1122013-02-02 17:32:33 -0500400 except error.AutotestRunError:
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500401 logging.warn('Unable to restart ui, rebooting device.')
402 # Since restarting the UI fails fall back to normal Autotest
403 # cleanup routines, i.e. reboot the machine.
404 super(SiteHost, self).cleanup()
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700405
406
Simran Basi154f5582012-10-23 16:27:11 -0700407 # TODO (sbasi) crosbug.com/35656
408 # Renamed the sitehost cleanup method so we don't go down this pathway.
409 # def cleanup(self):
410 def cleanup_poweron(self):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700411 """Special cleanup method to make sure hosts always get power back."""
Chris Sosa9479fcd2012-10-09 13:44:22 -0700412 super(SiteHost, self).cleanup()
Simran Basid5e5e272012-09-24 15:23:59 -0700413 if self.has_power():
Simran Basifd23fb22012-10-22 17:56:22 -0700414 try:
415 self.power_on()
Chris Sosafab08082013-01-04 15:21:20 -0800416 except rpm_client.RemotePowerException:
Simran Basifd23fb22012-10-22 17:56:22 -0700417 # If cleanup has completed but there was an issue with the RPM
418 # Infrastructure, log an error message rather than fail cleanup
419 logging.error('Failed to turn Power On for this host after '
420 'cleanup through the RPM Infrastructure.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700421
422
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700423 def reboot(self, **dargs):
424 """
425 This function reboots the site host. The more generic
426 RemoteHost.reboot() performs sync and sleeps for 5
427 seconds. This is not necessary for Chrome OS devices as the
428 sync should be finished in a short time during the reboot
429 command.
430 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800431 if 'reboot_cmd' not in dargs:
432 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
433 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700434 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800435 if 'fastsync' not in dargs:
436 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700437 super(SiteHost, self).reboot(**dargs)
438
439
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700440 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800441 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700442
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800443 Tests for the following conditions:
444 1. All conditions tested by the parent version of this
445 function.
446 2. Sufficient space in /mnt/stateful_partition.
447 3. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700448
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700449 """
450 super(SiteHost, self).verify_software()
451 self.check_diskspace(
452 '/mnt/stateful_partition',
453 global_config.global_config.get_config_value(
454 'SERVER', 'gb_diskspace_required', type=int,
455 default=20))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800456 self.run('update_engine_client --status')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700457
458
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800459 def xmlrpc_connect(self, command, port, command_name=None):
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700460 """Connect to an XMLRPC server on the host.
461
462 The `command` argument should be a simple shell command that
463 starts an XMLRPC server on the given `port`. The command
464 must not daemonize, and must terminate cleanly on SIGTERM.
465 The command is started in the background on the host, and a
466 local XMLRPC client for the server is created and returned
467 to the caller.
468
469 Note that the process of creating an XMLRPC client makes no
470 attempt to connect to the remote server; the caller is
471 responsible for determining whether the server is running
472 correctly, and is ready to serve requests.
473
474 @param command Shell command to start the server.
475 @param port Port number on which the server is expected to
476 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800477 @param command_name String to use as input to `pkill` to
478 terminate the XMLRPC server on the host.
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700479 """
480 self.xmlrpc_disconnect(port)
481
482 # Chrome OS on the target closes down most external ports
483 # for security. We could open the port, but doing that
484 # would conflict with security tests that check that only
485 # expected ports are open. So, to get to the port on the
486 # target we use an ssh tunnel.
487 local_port = utils.get_unused_port()
488 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
489 ssh_cmd = make_ssh_command(opts=tunnel_options)
490 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
491 logging.debug('Full tunnel command: %s', tunnel_cmd)
492 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
493 logging.debug('Started XMLRPC tunnel, local = %d'
494 ' remote = %d, pid = %d',
495 local_port, port, tunnel_proc.pid)
496
497 # Start the server on the host. Redirection in the command
498 # below is necessary, because 'ssh' won't terminate until
499 # background child processes close stdin, stdout, and
500 # stderr.
501 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
502 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
503 logging.debug('Started XMLRPC server on host %s, pid = %s',
504 self.hostname, remote_pid)
505
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800506 self._xmlrpc_proxy_map[port] = (command_name, tunnel_proc)
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700507 rpc_url = 'http://localhost:%d' % local_port
508 return xmlrpclib.ServerProxy(rpc_url, allow_none=True)
509
510
511 def xmlrpc_disconnect(self, port):
512 """Disconnect from an XMLRPC server on the host.
513
514 Terminates the remote XMLRPC server previously started for
515 the given `port`. Also closes the local ssh tunnel created
516 for the connection to the host. This function does not
517 directly alter the state of a previously returned XMLRPC
518 client object; however disconnection will cause all
519 subsequent calls to methods on the object to fail.
520
521 This function does nothing if requested to disconnect a port
522 that was not previously connected via `self.xmlrpc_connect()`
523
524 @param port Port number passed to a previous call to
525 `xmlrpc_connect()`
526 """
527 if port not in self._xmlrpc_proxy_map:
528 return
529 entry = self._xmlrpc_proxy_map[port]
530 remote_name = entry[0]
531 tunnel_proc = entry[1]
532 if remote_name:
533 # We use 'pkill' to find our target process rather than
534 # a PID, because the host may have rebooted since
535 # connecting, and we don't want to kill an innocent
536 # process with the same PID.
537 #
538 # 'pkill' helpfully exits with status 1 if no target
539 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -0700540 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700541 # status.
542 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
543
544 if tunnel_proc.poll() is None:
545 tunnel_proc.terminate()
546 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
547 else:
548 logging.debug('Tunnel pid %d terminated early, status %d',
549 tunnel_proc.pid, tunnel_proc.returncode)
550 del self._xmlrpc_proxy_map[port]
551
552
553 def xmlrpc_disconnect_all(self):
554 """Disconnect all known XMLRPC proxy ports."""
555 for port in self._xmlrpc_proxy_map.keys():
556 self.xmlrpc_disconnect(port)
557
558
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700559 def _ping_is_up(self):
560 """Ping the host once, and return whether it responded."""
561 return utils.ping(self.hostname, tries=1, deadline=1) == 0
562
563
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800564 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700565 """Wait until the host no longer responds to `ping`.
566
567 @param timeout Minimum time to allow before declaring the
568 host to be non-responsive.
569 """
570
571 # This function is a slightly faster version of wait_down().
572 #
573 # In AbstractSSHHost.wait_down(), `ssh` is used to determine
574 # whether the host is down. In some situations (mine, at
575 # least), `ssh` can take over a minute to determine that the
576 # host is down. The `ping` command answers the question
577 # faster, so we use that here instead.
578 #
579 # There is no equivalent for wait_up(), because a target that
580 # answers to `ping` won't necessarily respond to `ssh`.
581 end_time = time.time() + timeout
582 while time.time() <= end_time:
583 if not self._ping_is_up():
584 return True
585
586 # If the timeout is short relative to the run time of
587 # _ping_is_up(), we might be prone to false failures for
588 # lack of checking frequently enough. To be safe, we make
589 # one last check _after_ the deadline.
590 return not self._ping_is_up()
591
592
593 def test_wait_for_sleep(self):
594 """Wait for the client to enter low-power sleep mode.
595
596 The test for "is asleep" can't distinguish a system that is
597 powered off; to confirm that the unit was asleep, it is
598 necessary to force resume, and then call
599 `test_wait_for_resume()`.
600
601 This function is expected to be called from a test as part
602 of a sequence like the following:
603
604 ~~~~~~~~
605 boot_id = host.get_boot_id()
606 # trigger sleep on the host
607 host.test_wait_for_sleep()
608 # trigger resume on the host
609 host.test_wait_for_resume(boot_id)
610 ~~~~~~~~
611
612 @exception TestFail The host did not go to sleep within
613 the allowed time.
614 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800615 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700616 raise error.TestFail(
617 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700618 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700619
620
621 def test_wait_for_resume(self, old_boot_id):
622 """Wait for the client to resume from low-power sleep mode.
623
624 The `old_boot_id` parameter should be the value from
625 `get_boot_id()` obtained prior to entering sleep mode. A
626 `TestFail` exception is raised if the boot id changes.
627
628 See @ref test_wait_for_sleep for more on this function's
629 usage.
630
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800631 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700632 target host went to sleep.
633
634 @exception TestFail The host did not respond within the
635 allowed time.
636 @exception TestFail The host responded, but the boot id test
637 indicated a reboot rather than a sleep
638 cycle.
639 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700640 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700641 raise error.TestFail(
642 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700643 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700644 else:
645 new_boot_id = self.get_boot_id()
646 if new_boot_id != old_boot_id:
647 raise error.TestFail(
648 'client rebooted, but sleep was expected'
649 ' (old boot %s, new boot %s)'
650 % (old_boot_id, new_boot_id))
651
652
653 def test_wait_for_shutdown(self):
654 """Wait for the client to shut down.
655
656 The test for "has shut down" can't distinguish a system that
657 is merely asleep; to confirm that the unit was down, it is
658 necessary to force boot, and then call test_wait_for_boot().
659
660 This function is expected to be called from a test as part
661 of a sequence like the following:
662
663 ~~~~~~~~
664 boot_id = host.get_boot_id()
665 # trigger shutdown on the host
666 host.test_wait_for_shutdown()
667 # trigger boot on the host
668 host.test_wait_for_boot(boot_id)
669 ~~~~~~~~
670
671 @exception TestFail The host did not shut down within the
672 allowed time.
673 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800674 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700675 raise error.TestFail(
676 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700677 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700678
679
680 def test_wait_for_boot(self, old_boot_id=None):
681 """Wait for the client to boot from cold power.
682
683 The `old_boot_id` parameter should be the value from
684 `get_boot_id()` obtained prior to shutting down. A
685 `TestFail` exception is raised if the boot id does not
686 change. The boot id test is omitted if `old_boot_id` is not
687 specified.
688
689 See @ref test_wait_for_shutdown for more on this function's
690 usage.
691
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800692 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700693 shut down.
694
695 @exception TestFail The host did not respond within the
696 allowed time.
697 @exception TestFail The host responded, but the boot id test
698 indicated that there was no reboot.
699 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700700 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700701 raise error.TestFail(
702 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700703 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700704 elif old_boot_id:
705 if self.get_boot_id() == old_boot_id:
706 raise error.TestFail(
707 'client is back up, but did not reboot'
708 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -0700709
710
711 @staticmethod
712 def check_for_rpm_support(hostname):
713 """For a given hostname, return whether or not it is powered by an RPM.
714
715 @return None if this host does not follows the defined naming format
716 for RPM powered DUT's in the lab. If it does follow the format,
717 it returns a regular expression MatchObject instead.
718 """
Richard Barnette82c35912012-11-20 10:09:10 -0800719 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -0700720
721
722 def has_power(self):
723 """For this host, return whether or not it is powered by an RPM.
724
725 @return True if this host is in the CROS lab and follows the defined
726 naming format.
727 """
728 return SiteHost.check_for_rpm_support(self.hostname)
729
730
Simran Basid5e5e272012-09-24 15:23:59 -0700731 def power_off(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800732 """Turn off power to this host via RPM."""
Simran Basidcff4252012-11-20 16:13:20 -0800733 rpm_client.set_power(self.hostname, 'OFF')
Simran Basid5e5e272012-09-24 15:23:59 -0700734
735
736 def power_on(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800737 """Turn on power to this host via RPM."""
Simran Basidcff4252012-11-20 16:13:20 -0800738 rpm_client.set_power(self.hostname, 'ON')
Simran Basid5e5e272012-09-24 15:23:59 -0700739
740
741 def power_cycle(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800742 """Cycle power to this host by turning it OFF, then ON."""
Simran Basidcff4252012-11-20 16:13:20 -0800743 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -0700744
745
746 def get_platform(self):
747 """Determine the correct platform label for this host.
748
749 @returns a string representing this host's platform.
750 """
751 crossystem = utils.Crossystem(self)
752 crossystem.init()
753 # Extract fwid value and use the leading part as the platform id.
754 # fwid generally follow the format of {platform}.{firmware version}
755 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
756 platform = crossystem.fwid().split('.')[0].lower()
757 # Newer platforms start with 'Google_' while the older ones do not.
758 return platform.replace('google_', '')
759
760
Richard Barnette82c35912012-11-20 10:09:10 -0800761 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700762 def get_board(self):
763 """Determine the correct board label for this host.
764
765 @returns a string representing this host's board.
766 """
767 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
768 run_method=self.run)
769 board = release_info['CHROMEOS_RELEASE_BOARD']
770 # Devices in the lab generally have the correct board name but our own
771 # development devices have {board_name}-signed-{key_type}. The board
772 # name may also begin with 'x86-' which we need to keep.
773 if 'x86' not in board:
774 return 'board:%s' % board.split('-')[0]
775 return 'board:%s' % '-'.join(board.split('-')[0:2])
776
777
Richard Barnette82c35912012-11-20 10:09:10 -0800778 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700779 def has_lightsensor(self):
780 """Determine the correct board label for this host.
781
782 @returns the string 'lightsensor' if this host has a lightsensor or
783 None if it does not.
784 """
785 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -0800786 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -0700787 try:
788 # Run the search cmd following the symlinks. Stderr_tee is set to
789 # None as there can be a symlink loop, but the command will still
790 # execute correctly with a few messages printed to stderr.
791 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
792 return 'lightsensor'
793 except error.AutoservRunError:
794 # egrep exited with a return code of 1 meaning none of the possible
795 # lightsensor files existed.
796 return None
797
798
Richard Barnette82c35912012-11-20 10:09:10 -0800799 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700800 def has_bluetooth(self):
801 """Determine the correct board label for this host.
802
803 @returns the string 'bluetooth' if this host has bluetooth or
804 None if it does not.
805 """
806 try:
807 self.run('test -d /sys/class/bluetooth/hci0')
808 # test exited with a return code of 0.
809 return 'bluetooth'
810 except error.AutoservRunError:
811 # test exited with a return code 1 meaning the directory did not
812 # exist.
813 return None
814
815
816 def get_labels(self):
817 """Return a list of labels for this given host.
818
819 This is the main way to retrieve all the automatic labels for a host
820 as it will run through all the currently implemented label functions.
821 """
822 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -0800823 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -0700824 label = label_function(self)
825 if label:
826 labels.append(label)
827 return labels