blob: 4ba30d245beb8bdddfa63f766a4b19f967ef8d6f [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
J. Richard Barnette1d78b012012-05-15 13:56:30 -07005import logging
Simran Basid5e5e272012-09-24 15:23:59 -07006import re
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07008import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -07009import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070010
J. Richard Barnette45e93de2012-04-11 17:24:15 -070011from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080012from autotest_lib.client.common_lib import error
13from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070014from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080015from autotest_lib.client.common_lib.cros import dev_server
Richard Barnette82c35912012-11-20 10:09:10 -080016from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070018from autotest_lib.server import autotest
J. Richard Barnette45e93de2012-04-11 17:24:15 -070019from autotest_lib.server import site_host_attributes
J. Richard Barnette67ccb872012-04-19 16:34:56 -070020from autotest_lib.server.cros import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.server.hosts import remote
Simran Basidcff4252012-11-20 16:13:20 -080022from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070023
Richard Barnette82c35912012-11-20 10:09:10 -080024# Importing frontend.afe.models requires a full Autotest
25# installation (with the Django modules), not just the source
26# repository. Most developers won't have the full installation, so
27# the imports below will fail for them.
28#
29# The fix is to catch import exceptions, and set `models` to `None`
30# on failure. This has the side effect that
31# SiteHost._get_board_from_afe() will fail: That will manifest as
32# failures during Repair jobs leaving the DUT as "Repair Failed".
33# In practice, you can't test Repair jobs without a full
34# installation, so that kind of failure isn't expected.
35try:
36 from autotest_lib.frontend import setup_django_environment
37 from autotest_lib.frontend.afe import models
38except:
39 models = None
40
Simran Basid5e5e272012-09-24 15:23:59 -070041
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070042def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
43 connect_timeout=None, alive_interval=None):
44 """Override default make_ssh_command to use options tuned for Chrome OS.
45
46 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070047 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
48 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070049
Dale Curtisaa5eedb2011-08-23 16:18:52 -070050 - ServerAliveInterval=180; which causes SSH to ping connection every
51 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
52 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
53 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070054
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070055 - ServerAliveCountMax=3; consistency with remote_access.sh.
56
57 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
58 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070059
60 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
61 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070062
63 - SSH protocol forced to 2; needed for ServerAliveInterval.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070064 """
65 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
66 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070067 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
68 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
69 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070070 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070071
72
Simran Basic6f1f7a2012-10-16 10:47:46 -070073def add_function_to_list(functions_list):
74 """Decorator used to group functions together into the provided list."""
75 def add_func(func):
76 functions_list.append(func)
77 return func
78 return add_func
79
80
J. Richard Barnette45e93de2012-04-11 17:24:15 -070081class SiteHost(remote.RemoteHost):
82 """Chromium OS specific subclass of Host."""
83
84 _parser = autoserv_parser.autoserv_parser
85
Richard Barnette0c73ffc2012-11-19 15:21:18 -080086 # Time to wait for new kernel to be marked successful after
87 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -070088 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -070089
Richard Barnette03a0c132012-11-05 12:40:35 -080090 # Timeout values (in seconds) associated with various Chrome OS
91 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070092 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080093 # In general, a good rule of thumb is that the timeout can be up
94 # to twice the typical measured value on the slowest platform.
95 # The times here have not necessarily been empirically tested to
96 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070097 #
98 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080099 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
100 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700101 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800102 # other things, this must account for the 30 second dev-mode
103 # screen delay and time to start the network,
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700104 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800105 # including the 30 second dev-mode delay and time to start the
106 # network,
107 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700108 # REBOOT_TIMEOUT: Combination of shutdown and reboot times.
Richard Barnette03a0c132012-11-05 12:40:35 -0800109 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700110
111 SLEEP_TIMEOUT = 2
112 RESUME_TIMEOUT = 5
113 BOOT_TIMEOUT = 45
114 USB_BOOT_TIMEOUT = 150
115 SHUTDOWN_TIMEOUT = 5
116 REBOOT_TIMEOUT = SHUTDOWN_TIMEOUT + BOOT_TIMEOUT
Richard Barnette03a0c132012-11-05 12:40:35 -0800117 _INSTALL_TIMEOUT = 240
118
119 _DEFAULT_SERVO_URL_FORMAT = ('/static/servo-images/'
120 '%(board)s_test_image.bin')
121
122 # TODO(jrbarnette): Servo repair is restricted to x86-alex,
123 # because the existing servo client code won't work on other
124 # boards. http://crosbug.com/36973
125 _SERVO_REPAIR_WHITELIST = [ 'x86-alex' ]
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800126
127
Richard Barnette82c35912012-11-20 10:09:10 -0800128 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
129 'rpm_recovery_boards', type=str).split(',')
130
131 _MAX_POWER_CYCLE_ATTEMPTS = 6
132 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
133 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
134 'host[0-9]+')
135 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
136 'in_illuminance0_raw',
137 'illuminance0_input']
138 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
139 _LABEL_FUNCTIONS = []
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700140
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800141
J. Richard Barnette964fba02012-10-24 17:34:29 -0700142 @staticmethod
143 def get_servo_arguments(arglist):
144 servo_args = {}
145 for arg in ('servo_host', 'servo_port'):
146 if arg in arglist:
147 servo_args[arg] = arglist[arg]
148 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700149
J. Richard Barnette964fba02012-10-24 17:34:29 -0700150
151 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700152 """Initialize superclasses, and |self.servo|.
153
154 For creating the host servo object, there are three
155 possibilities: First, if the host is a lab system known to
156 have a servo board, we connect to that servo unconditionally.
157 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700158 servo features for testing, it will pass settings for
159 `servo_host`, `servo_port`, or both. If neither of these
160 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700161
162 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700163 super(SiteHost, self)._initialize(hostname=hostname,
164 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700165 # self.env is a dictionary of environment variable settings
166 # to be exported for commands run on the host.
167 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
168 # errors that might happen.
169 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700170 self._xmlrpc_proxy_map = {}
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700171 self.servo = servo.Servo.get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700172 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700173 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700174
175
Chris Sosaa3ac2152012-05-23 22:23:13 -0700176 def machine_install(self, update_url=None, force_update=False,
177 local_devserver=False):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700178 if not update_url and self._parser.options.image:
179 update_url = self._parser.options.image
180 elif not update_url:
181 raise autoupdater.ChromiumOSError(
182 'Update failed. No update URL provided.')
183
Chris Sosafab08082013-01-04 15:21:20 -0800184 # In case the system is in a bad state, we always reboot the machine
185 # before machine_install.
186 self.reboot(timeout=60, wait=True)
187
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700188 # Attempt to update the system.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700189 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
190 local_devserver=local_devserver)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700191 if updater.run_update(force_update):
192 # Figure out active and inactive kernel.
193 active_kernel, inactive_kernel = updater.get_kernel_state()
194
195 # Ensure inactive kernel has higher priority than active.
196 if (updater.get_kernel_priority(inactive_kernel)
197 < updater.get_kernel_priority(active_kernel)):
198 raise autoupdater.ChromiumOSError(
199 'Update failed. The priority of the inactive kernel'
200 ' partition is less than that of the active kernel'
201 ' partition.')
202
Scott Zawalski21902002012-09-19 17:57:00 -0400203 update_engine_log = '/var/log/update_engine.log'
204 logging.info('Dumping %s', update_engine_log)
205 self.run('cat %s' % update_engine_log)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800206 # Updater has returned successfully; reboot the host.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700207 self.reboot(timeout=60, wait=True)
Chris Sosae146ed82012-09-19 17:58:36 -0700208 # Touch the lab machine file to leave a marker that distinguishes
209 # this image from other test images.
Richard Barnette82c35912012-11-20 10:09:10 -0800210 self.run('touch %s' % self._LAB_MACHINE_FILE)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700211
212 # Following the reboot, verify the correct version.
213 updater.check_version()
214
215 # Figure out newly active kernel.
216 new_active_kernel, _ = updater.get_kernel_state()
217
218 # Ensure that previously inactive kernel is now the active kernel.
219 if new_active_kernel != inactive_kernel:
220 raise autoupdater.ChromiumOSError(
221 'Update failed. New kernel partition is not active after'
222 ' boot.')
223
224 host_attributes = site_host_attributes.HostAttributes(self.hostname)
225 if host_attributes.has_chromeos_firmware:
226 # Wait until tries == 0 and success, or until timeout.
227 utils.poll_for_condition(
228 lambda: (updater.get_kernel_tries(new_active_kernel) == 0
229 and updater.get_kernel_success(new_active_kernel)),
230 exception=autoupdater.ChromiumOSError(
231 'Update failed. Timed out waiting for system to mark'
232 ' new kernel as successful.'),
233 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
234
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700235 # Clean up any old autotest directories which may be lying around.
236 for path in global_config.global_config.get_config_value(
237 'AUTOSERV', 'client_autodir_paths', type=list):
238 self.run('rm -rf ' + path)
239
240
Richard Barnette82c35912012-11-20 10:09:10 -0800241 def _get_board_from_afe(self):
242 """Retrieve this host's board from its labels in the AFE.
243
244 Looks for a host label of the form "board:<board>", and
245 returns the "<board>" part of the label. `None` is returned
246 if there is not a single, unique label matching the pattern.
247
248 @returns board from label, or `None`.
249 """
250 host_model = models.Host.objects.get(hostname=self.hostname)
251 board_labels = filter(lambda l: l.name.startswith('board:'),
252 host_model.labels.all())
253 board_name = None
254 if len(board_labels) == 1:
255 board_name = board_labels[0].name.split(':', 1)[1]
256 elif len(board_labels) == 0:
257 logging.error('Host %s does not have a board label.',
258 self.hostname)
259 else:
260 logging.error('Host %s has multiple board labels.',
261 self.hostname)
262 return board_name
263
264
Richard Barnette03a0c132012-11-05 12:40:35 -0800265 def _servo_repair(self, board):
266 """Attempt to repair this host using an attached Servo.
267
268 Re-install the OS on the DUT by 1) installing a test image
269 on a USB storage device attached to the Servo board,
270 2) booting that image in recovery mode, and then
271 3) installing the image.
272
273 """
274 server = dev_server.ImageServer.devserver_url_for_servo(board)
275 image = server + (self._DEFAULT_SERVO_URL_FORMAT %
276 { 'board': board })
277 self.servo.install_recovery_image(image)
278 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
279 raise error.AutoservError('DUT failed to boot from USB'
280 ' after %d seconds' %
281 self.USB_BOOT_TIMEOUT)
282 self.run('chromeos-install --yes',
283 timeout=self._INSTALL_TIMEOUT)
284 self.servo.power_long_press()
285 self.servo.set('usb_mux_sel1', 'servo_sees_usbkey')
286 self.servo.power_short_press()
287 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
288 raise error.AutoservError('DUT failed to reboot installed '
289 'test image after %d seconds' %
290 self.BOOT_TIMEOUT)
291
292
Richard Barnette82c35912012-11-20 10:09:10 -0800293 def _powercycle_to_repair(self):
294 """Utilize the RPM Infrastructure to bring the host back up.
295
296 If the host is not up/repaired after the first powercycle we utilize
297 auto fallback to the last good install by powercycling and rebooting the
298 host 6 times.
299 """
300 logging.info('Attempting repair via RPM powercycle.')
301 failed_cycles = 0
302 self.power_cycle()
303 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
304 failed_cycles += 1
305 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
306 raise error.AutoservError('Powercycled host %s %d times; '
307 'device did not come back online.' %
308 (self.hostname, failed_cycles))
309 self.power_cycle()
310 if failed_cycles == 0:
311 logging.info('Powercycling was successful first time.')
312 else:
313 logging.info('Powercycling was successful after %d failures.',
314 failed_cycles)
315
316
317 def repair_full(self):
318 """Repair a host for repair level NO_PROTECTION.
319
320 This overrides the base class function for repair; it does
321 not call back to the parent class, but instead offers a
322 simplified implementation based on the capabilities in the
323 Chrome OS test lab.
324
325 Repair follows this sequence:
326 1. If the DUT passes `self.verify()`, do nothing.
327 2. If the DUT can be power-cycled via RPM, try to repair
328 by power-cycling.
329
330 As with the parent method, the last operation performed on
331 the DUT must be to call `self.verify()`; if that call fails,
332 the exception it raises is passed back to the caller.
333 """
334 try:
335 self.verify()
336 except:
337 host_board = self._get_board_from_afe()
Richard Barnette03a0c132012-11-05 12:40:35 -0800338 if host_board is None:
339 logging.error('host %s has no board; failing repair',
340 self.hostname)
Richard Barnette82c35912012-11-20 10:09:10 -0800341 raise
Richard Barnette03a0c132012-11-05 12:40:35 -0800342 if (self.servo and
343 host_board in self._SERVO_REPAIR_WHITELIST):
344 self._servo_repair(host_board)
345 elif (self.has_power() and
346 host_board in self._RPM_RECOVERY_BOARDS):
347 self._powercycle_to_repair()
348 else:
349 logging.error('host %s has no servo and no RPM control; '
350 'failing repair', self.hostname)
351 raise
Richard Barnette82c35912012-11-20 10:09:10 -0800352 self.verify()
353
354
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700355 def close(self):
356 super(SiteHost, self).close()
357 self.xmlrpc_disconnect_all()
358
359
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700360 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700361 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800362 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500363 try:
364 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
365 '_clear_login_prompt_state')
366 self.run('restart ui')
367 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
368 '_wait_for_login_prompt')
369 except error.AutoservRunError:
370 logging.warn('Unable to restart ui, rebooting device.')
371 # Since restarting the UI fails fall back to normal Autotest
372 # cleanup routines, i.e. reboot the machine.
373 super(SiteHost, self).cleanup()
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700374
375
Simran Basi154f5582012-10-23 16:27:11 -0700376 # TODO (sbasi) crosbug.com/35656
377 # Renamed the sitehost cleanup method so we don't go down this pathway.
378 # def cleanup(self):
379 def cleanup_poweron(self):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700380 """Special cleanup method to make sure hosts always get power back."""
Chris Sosa9479fcd2012-10-09 13:44:22 -0700381 super(SiteHost, self).cleanup()
Simran Basid5e5e272012-09-24 15:23:59 -0700382 if self.has_power():
Simran Basifd23fb22012-10-22 17:56:22 -0700383 try:
384 self.power_on()
Chris Sosafab08082013-01-04 15:21:20 -0800385 except rpm_client.RemotePowerException:
Simran Basifd23fb22012-10-22 17:56:22 -0700386 # If cleanup has completed but there was an issue with the RPM
387 # Infrastructure, log an error message rather than fail cleanup
388 logging.error('Failed to turn Power On for this host after '
389 'cleanup through the RPM Infrastructure.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700390
391
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700392 def reboot(self, **dargs):
393 """
394 This function reboots the site host. The more generic
395 RemoteHost.reboot() performs sync and sleeps for 5
396 seconds. This is not necessary for Chrome OS devices as the
397 sync should be finished in a short time during the reboot
398 command.
399 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800400 if 'reboot_cmd' not in dargs:
401 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
402 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700403 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800404 if 'fastsync' not in dargs:
405 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700406 super(SiteHost, self).reboot(**dargs)
407
408
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700409 def verify_software(self):
410 """Ensure the stateful partition has space for Autotest and updates.
411
412 Similar to what is done by AbstractSSH, except instead of checking the
413 Autotest installation path, just check the stateful partition.
414
415 Checking the stateful partition is preferable in case it has been wiped,
416 resulting in an Autotest installation path which doesn't exist and isn't
417 writable. We still want to pass verify in this state since the partition
418 will be recovered with the next install.
419 """
420 super(SiteHost, self).verify_software()
421 self.check_diskspace(
422 '/mnt/stateful_partition',
423 global_config.global_config.get_config_value(
424 'SERVER', 'gb_diskspace_required', type=int,
425 default=20))
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700426
427
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700428 def xmlrpc_connect(self, command, port, cleanup=None):
429 """Connect to an XMLRPC server on the host.
430
431 The `command` argument should be a simple shell command that
432 starts an XMLRPC server on the given `port`. The command
433 must not daemonize, and must terminate cleanly on SIGTERM.
434 The command is started in the background on the host, and a
435 local XMLRPC client for the server is created and returned
436 to the caller.
437
438 Note that the process of creating an XMLRPC client makes no
439 attempt to connect to the remote server; the caller is
440 responsible for determining whether the server is running
441 correctly, and is ready to serve requests.
442
443 @param command Shell command to start the server.
444 @param port Port number on which the server is expected to
445 be serving.
446 """
447 self.xmlrpc_disconnect(port)
448
449 # Chrome OS on the target closes down most external ports
450 # for security. We could open the port, but doing that
451 # would conflict with security tests that check that only
452 # expected ports are open. So, to get to the port on the
453 # target we use an ssh tunnel.
454 local_port = utils.get_unused_port()
455 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
456 ssh_cmd = make_ssh_command(opts=tunnel_options)
457 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
458 logging.debug('Full tunnel command: %s', tunnel_cmd)
459 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
460 logging.debug('Started XMLRPC tunnel, local = %d'
461 ' remote = %d, pid = %d',
462 local_port, port, tunnel_proc.pid)
463
464 # Start the server on the host. Redirection in the command
465 # below is necessary, because 'ssh' won't terminate until
466 # background child processes close stdin, stdout, and
467 # stderr.
468 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
469 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
470 logging.debug('Started XMLRPC server on host %s, pid = %s',
471 self.hostname, remote_pid)
472
473 self._xmlrpc_proxy_map[port] = (cleanup, tunnel_proc)
474 rpc_url = 'http://localhost:%d' % local_port
475 return xmlrpclib.ServerProxy(rpc_url, allow_none=True)
476
477
478 def xmlrpc_disconnect(self, port):
479 """Disconnect from an XMLRPC server on the host.
480
481 Terminates the remote XMLRPC server previously started for
482 the given `port`. Also closes the local ssh tunnel created
483 for the connection to the host. This function does not
484 directly alter the state of a previously returned XMLRPC
485 client object; however disconnection will cause all
486 subsequent calls to methods on the object to fail.
487
488 This function does nothing if requested to disconnect a port
489 that was not previously connected via `self.xmlrpc_connect()`
490
491 @param port Port number passed to a previous call to
492 `xmlrpc_connect()`
493 """
494 if port not in self._xmlrpc_proxy_map:
495 return
496 entry = self._xmlrpc_proxy_map[port]
497 remote_name = entry[0]
498 tunnel_proc = entry[1]
499 if remote_name:
500 # We use 'pkill' to find our target process rather than
501 # a PID, because the host may have rebooted since
502 # connecting, and we don't want to kill an innocent
503 # process with the same PID.
504 #
505 # 'pkill' helpfully exits with status 1 if no target
506 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -0700507 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700508 # status.
509 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
510
511 if tunnel_proc.poll() is None:
512 tunnel_proc.terminate()
513 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
514 else:
515 logging.debug('Tunnel pid %d terminated early, status %d',
516 tunnel_proc.pid, tunnel_proc.returncode)
517 del self._xmlrpc_proxy_map[port]
518
519
520 def xmlrpc_disconnect_all(self):
521 """Disconnect all known XMLRPC proxy ports."""
522 for port in self._xmlrpc_proxy_map.keys():
523 self.xmlrpc_disconnect(port)
524
525
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700526 def _ping_is_up(self):
527 """Ping the host once, and return whether it responded."""
528 return utils.ping(self.hostname, tries=1, deadline=1) == 0
529
530
531 def _ping_wait_down(self, timeout):
532 """Wait until the host no longer responds to `ping`.
533
534 @param timeout Minimum time to allow before declaring the
535 host to be non-responsive.
536 """
537
538 # This function is a slightly faster version of wait_down().
539 #
540 # In AbstractSSHHost.wait_down(), `ssh` is used to determine
541 # whether the host is down. In some situations (mine, at
542 # least), `ssh` can take over a minute to determine that the
543 # host is down. The `ping` command answers the question
544 # faster, so we use that here instead.
545 #
546 # There is no equivalent for wait_up(), because a target that
547 # answers to `ping` won't necessarily respond to `ssh`.
548 end_time = time.time() + timeout
549 while time.time() <= end_time:
550 if not self._ping_is_up():
551 return True
552
553 # If the timeout is short relative to the run time of
554 # _ping_is_up(), we might be prone to false failures for
555 # lack of checking frequently enough. To be safe, we make
556 # one last check _after_ the deadline.
557 return not self._ping_is_up()
558
559
560 def test_wait_for_sleep(self):
561 """Wait for the client to enter low-power sleep mode.
562
563 The test for "is asleep" can't distinguish a system that is
564 powered off; to confirm that the unit was asleep, it is
565 necessary to force resume, and then call
566 `test_wait_for_resume()`.
567
568 This function is expected to be called from a test as part
569 of a sequence like the following:
570
571 ~~~~~~~~
572 boot_id = host.get_boot_id()
573 # trigger sleep on the host
574 host.test_wait_for_sleep()
575 # trigger resume on the host
576 host.test_wait_for_resume(boot_id)
577 ~~~~~~~~
578
579 @exception TestFail The host did not go to sleep within
580 the allowed time.
581 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700582 if not self._ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700583 raise error.TestFail(
584 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700585 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700586
587
588 def test_wait_for_resume(self, old_boot_id):
589 """Wait for the client to resume from low-power sleep mode.
590
591 The `old_boot_id` parameter should be the value from
592 `get_boot_id()` obtained prior to entering sleep mode. A
593 `TestFail` exception is raised if the boot id changes.
594
595 See @ref test_wait_for_sleep for more on this function's
596 usage.
597
598 @param[in] old_boot_id A boot id value obtained before the
599 target host went to sleep.
600
601 @exception TestFail The host did not respond within the
602 allowed time.
603 @exception TestFail The host responded, but the boot id test
604 indicated a reboot rather than a sleep
605 cycle.
606 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700607 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700608 raise error.TestFail(
609 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700610 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700611 else:
612 new_boot_id = self.get_boot_id()
613 if new_boot_id != old_boot_id:
614 raise error.TestFail(
615 'client rebooted, but sleep was expected'
616 ' (old boot %s, new boot %s)'
617 % (old_boot_id, new_boot_id))
618
619
620 def test_wait_for_shutdown(self):
621 """Wait for the client to shut down.
622
623 The test for "has shut down" can't distinguish a system that
624 is merely asleep; to confirm that the unit was down, it is
625 necessary to force boot, and then call test_wait_for_boot().
626
627 This function is expected to be called from a test as part
628 of a sequence like the following:
629
630 ~~~~~~~~
631 boot_id = host.get_boot_id()
632 # trigger shutdown on the host
633 host.test_wait_for_shutdown()
634 # trigger boot on the host
635 host.test_wait_for_boot(boot_id)
636 ~~~~~~~~
637
638 @exception TestFail The host did not shut down within the
639 allowed time.
640 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700641 if not self._ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700642 raise error.TestFail(
643 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700644 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700645
646
647 def test_wait_for_boot(self, old_boot_id=None):
648 """Wait for the client to boot from cold power.
649
650 The `old_boot_id` parameter should be the value from
651 `get_boot_id()` obtained prior to shutting down. A
652 `TestFail` exception is raised if the boot id does not
653 change. The boot id test is omitted if `old_boot_id` is not
654 specified.
655
656 See @ref test_wait_for_shutdown for more on this function's
657 usage.
658
659 @param[in] old_boot_id A boot id value obtained before the
660 shut down.
661
662 @exception TestFail The host did not respond within the
663 allowed time.
664 @exception TestFail The host responded, but the boot id test
665 indicated that there was no reboot.
666 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700667 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700668 raise error.TestFail(
669 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700670 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700671 elif old_boot_id:
672 if self.get_boot_id() == old_boot_id:
673 raise error.TestFail(
674 'client is back up, but did not reboot'
675 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -0700676
677
678 @staticmethod
679 def check_for_rpm_support(hostname):
680 """For a given hostname, return whether or not it is powered by an RPM.
681
682 @return None if this host does not follows the defined naming format
683 for RPM powered DUT's in the lab. If it does follow the format,
684 it returns a regular expression MatchObject instead.
685 """
Richard Barnette82c35912012-11-20 10:09:10 -0800686 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -0700687
688
689 def has_power(self):
690 """For this host, return whether or not it is powered by an RPM.
691
692 @return True if this host is in the CROS lab and follows the defined
693 naming format.
694 """
695 return SiteHost.check_for_rpm_support(self.hostname)
696
697
Simran Basid5e5e272012-09-24 15:23:59 -0700698 def power_off(self):
Simran Basidcff4252012-11-20 16:13:20 -0800699 rpm_client.set_power(self.hostname, 'OFF')
Simran Basid5e5e272012-09-24 15:23:59 -0700700
701
702 def power_on(self):
Simran Basidcff4252012-11-20 16:13:20 -0800703 rpm_client.set_power(self.hostname, 'ON')
Simran Basid5e5e272012-09-24 15:23:59 -0700704
705
706 def power_cycle(self):
Simran Basidcff4252012-11-20 16:13:20 -0800707 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -0700708
709
710 def get_platform(self):
711 """Determine the correct platform label for this host.
712
713 @returns a string representing this host's platform.
714 """
715 crossystem = utils.Crossystem(self)
716 crossystem.init()
717 # Extract fwid value and use the leading part as the platform id.
718 # fwid generally follow the format of {platform}.{firmware version}
719 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
720 platform = crossystem.fwid().split('.')[0].lower()
721 # Newer platforms start with 'Google_' while the older ones do not.
722 return platform.replace('google_', '')
723
724
Richard Barnette82c35912012-11-20 10:09:10 -0800725 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700726 def get_board(self):
727 """Determine the correct board label for this host.
728
729 @returns a string representing this host's board.
730 """
731 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
732 run_method=self.run)
733 board = release_info['CHROMEOS_RELEASE_BOARD']
734 # Devices in the lab generally have the correct board name but our own
735 # development devices have {board_name}-signed-{key_type}. The board
736 # name may also begin with 'x86-' which we need to keep.
737 if 'x86' not in board:
738 return 'board:%s' % board.split('-')[0]
739 return 'board:%s' % '-'.join(board.split('-')[0:2])
740
741
Richard Barnette82c35912012-11-20 10:09:10 -0800742 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700743 def has_lightsensor(self):
744 """Determine the correct board label for this host.
745
746 @returns the string 'lightsensor' if this host has a lightsensor or
747 None if it does not.
748 """
749 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -0800750 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -0700751 try:
752 # Run the search cmd following the symlinks. Stderr_tee is set to
753 # None as there can be a symlink loop, but the command will still
754 # execute correctly with a few messages printed to stderr.
755 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
756 return 'lightsensor'
757 except error.AutoservRunError:
758 # egrep exited with a return code of 1 meaning none of the possible
759 # lightsensor files existed.
760 return None
761
762
Richard Barnette82c35912012-11-20 10:09:10 -0800763 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700764 def has_bluetooth(self):
765 """Determine the correct board label for this host.
766
767 @returns the string 'bluetooth' if this host has bluetooth or
768 None if it does not.
769 """
770 try:
771 self.run('test -d /sys/class/bluetooth/hci0')
772 # test exited with a return code of 0.
773 return 'bluetooth'
774 except error.AutoservRunError:
775 # test exited with a return code 1 meaning the directory did not
776 # exist.
777 return None
778
779
780 def get_labels(self):
781 """Return a list of labels for this given host.
782
783 This is the main way to retrieve all the automatic labels for a host
784 as it will run through all the currently implemented label functions.
785 """
786 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -0800787 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -0700788 label = label_function(self)
789 if label:
790 labels.append(label)
791 return labels