blob: ffee2ee1b355530ac71780906ea8edc3ec0c14e3 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
J. Richard Barnette1d78b012012-05-15 13:56:30 -07005import logging
Simran Basid5e5e272012-09-24 15:23:59 -07006import re
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07008import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -07009import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070010
J. Richard Barnette45e93de2012-04-11 17:24:15 -070011from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080012from autotest_lib.client.common_lib import error
13from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070014from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080015from autotest_lib.client.common_lib.cros import dev_server
Richard Barnette82c35912012-11-20 10:09:10 -080016from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070018from autotest_lib.server import autotest
J. Richard Barnette45e93de2012-04-11 17:24:15 -070019from autotest_lib.server import site_host_attributes
J. Richard Barnette67ccb872012-04-19 16:34:56 -070020from autotest_lib.server.cros import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.server.hosts import remote
Simran Basidcff4252012-11-20 16:13:20 -080022from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070023
Richard Barnette82c35912012-11-20 10:09:10 -080024# Importing frontend.afe.models requires a full Autotest
25# installation (with the Django modules), not just the source
26# repository. Most developers won't have the full installation, so
27# the imports below will fail for them.
28#
29# The fix is to catch import exceptions, and set `models` to `None`
30# on failure. This has the side effect that
31# SiteHost._get_board_from_afe() will fail: That will manifest as
32# failures during Repair jobs leaving the DUT as "Repair Failed".
33# In practice, you can't test Repair jobs without a full
34# installation, so that kind of failure isn't expected.
35try:
36 from autotest_lib.frontend import setup_django_environment
37 from autotest_lib.frontend.afe import models
38except:
39 models = None
40
Simran Basid5e5e272012-09-24 15:23:59 -070041
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070042def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
43 connect_timeout=None, alive_interval=None):
44 """Override default make_ssh_command to use options tuned for Chrome OS.
45
46 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070047 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
48 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070049
Dale Curtisaa5eedb2011-08-23 16:18:52 -070050 - ServerAliveInterval=180; which causes SSH to ping connection every
51 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
52 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
53 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070054
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070055 - ServerAliveCountMax=3; consistency with remote_access.sh.
56
57 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
58 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070059
60 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
61 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070062
63 - SSH protocol forced to 2; needed for ServerAliveInterval.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070064 """
65 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
66 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070067 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
68 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
69 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070070 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070071
72
Simran Basic6f1f7a2012-10-16 10:47:46 -070073def add_function_to_list(functions_list):
74 """Decorator used to group functions together into the provided list."""
75 def add_func(func):
76 functions_list.append(func)
77 return func
78 return add_func
79
80
J. Richard Barnette45e93de2012-04-11 17:24:15 -070081class SiteHost(remote.RemoteHost):
82 """Chromium OS specific subclass of Host."""
83
84 _parser = autoserv_parser.autoserv_parser
85
Richard Barnette0c73ffc2012-11-19 15:21:18 -080086 # Time to wait for new kernel to be marked successful after
87 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -070088 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -070089
Richard Barnette03a0c132012-11-05 12:40:35 -080090 # Timeout values (in seconds) associated with various Chrome OS
91 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070092 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080093 # In general, a good rule of thumb is that the timeout can be up
94 # to twice the typical measured value on the slowest platform.
95 # The times here have not necessarily been empirically tested to
96 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070097 #
98 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080099 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
100 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700101 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800102 # other things, this must account for the 30 second dev-mode
103 # screen delay and time to start the network,
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700104 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800105 # including the 30 second dev-mode delay and time to start the
106 # network,
107 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700108 # REBOOT_TIMEOUT: Combination of shutdown and reboot times.
Richard Barnette03a0c132012-11-05 12:40:35 -0800109 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700110
111 SLEEP_TIMEOUT = 2
112 RESUME_TIMEOUT = 5
113 BOOT_TIMEOUT = 45
114 USB_BOOT_TIMEOUT = 150
115 SHUTDOWN_TIMEOUT = 5
116 REBOOT_TIMEOUT = SHUTDOWN_TIMEOUT + BOOT_TIMEOUT
Richard Barnette03a0c132012-11-05 12:40:35 -0800117 _INSTALL_TIMEOUT = 240
118
119 _DEFAULT_SERVO_URL_FORMAT = ('/static/servo-images/'
120 '%(board)s_test_image.bin')
121
122 # TODO(jrbarnette): Servo repair is restricted to x86-alex,
123 # because the existing servo client code won't work on other
124 # boards. http://crosbug.com/36973
125 _SERVO_REPAIR_WHITELIST = [ 'x86-alex' ]
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800126
127
Richard Barnette82c35912012-11-20 10:09:10 -0800128 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
129 'rpm_recovery_boards', type=str).split(',')
130
131 _MAX_POWER_CYCLE_ATTEMPTS = 6
132 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
133 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
134 'host[0-9]+')
135 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
136 'in_illuminance0_raw',
137 'illuminance0_input']
138 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
139 _LABEL_FUNCTIONS = []
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700140
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800141
J. Richard Barnette964fba02012-10-24 17:34:29 -0700142 @staticmethod
143 def get_servo_arguments(arglist):
144 servo_args = {}
145 for arg in ('servo_host', 'servo_port'):
146 if arg in arglist:
147 servo_args[arg] = arglist[arg]
148 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700149
J. Richard Barnette964fba02012-10-24 17:34:29 -0700150
151 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700152 """Initialize superclasses, and |self.servo|.
153
154 For creating the host servo object, there are three
155 possibilities: First, if the host is a lab system known to
156 have a servo board, we connect to that servo unconditionally.
157 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700158 servo features for testing, it will pass settings for
159 `servo_host`, `servo_port`, or both. If neither of these
160 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700161
162 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700163 super(SiteHost, self)._initialize(hostname=hostname,
164 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700165 # self.env is a dictionary of environment variable settings
166 # to be exported for commands run on the host.
167 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
168 # errors that might happen.
169 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700170 self._xmlrpc_proxy_map = {}
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700171 self.servo = servo.Servo.get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700172 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700173 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700174
175
Chris Sosaa3ac2152012-05-23 22:23:13 -0700176 def machine_install(self, update_url=None, force_update=False,
177 local_devserver=False):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700178 if not update_url and self._parser.options.image:
179 update_url = self._parser.options.image
180 elif not update_url:
181 raise autoupdater.ChromiumOSError(
182 'Update failed. No update URL provided.')
183
184 # Attempt to update the system.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700185 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
186 local_devserver=local_devserver)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700187 if updater.run_update(force_update):
188 # Figure out active and inactive kernel.
189 active_kernel, inactive_kernel = updater.get_kernel_state()
190
191 # Ensure inactive kernel has higher priority than active.
192 if (updater.get_kernel_priority(inactive_kernel)
193 < updater.get_kernel_priority(active_kernel)):
194 raise autoupdater.ChromiumOSError(
195 'Update failed. The priority of the inactive kernel'
196 ' partition is less than that of the active kernel'
197 ' partition.')
198
Scott Zawalski21902002012-09-19 17:57:00 -0400199 update_engine_log = '/var/log/update_engine.log'
200 logging.info('Dumping %s', update_engine_log)
201 self.run('cat %s' % update_engine_log)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800202 # Updater has returned successfully; reboot the host.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700203 self.reboot(timeout=60, wait=True)
Chris Sosae146ed82012-09-19 17:58:36 -0700204 # Touch the lab machine file to leave a marker that distinguishes
205 # this image from other test images.
Richard Barnette82c35912012-11-20 10:09:10 -0800206 self.run('touch %s' % self._LAB_MACHINE_FILE)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700207
208 # Following the reboot, verify the correct version.
209 updater.check_version()
210
211 # Figure out newly active kernel.
212 new_active_kernel, _ = updater.get_kernel_state()
213
214 # Ensure that previously inactive kernel is now the active kernel.
215 if new_active_kernel != inactive_kernel:
216 raise autoupdater.ChromiumOSError(
217 'Update failed. New kernel partition is not active after'
218 ' boot.')
219
220 host_attributes = site_host_attributes.HostAttributes(self.hostname)
221 if host_attributes.has_chromeos_firmware:
222 # Wait until tries == 0 and success, or until timeout.
223 utils.poll_for_condition(
224 lambda: (updater.get_kernel_tries(new_active_kernel) == 0
225 and updater.get_kernel_success(new_active_kernel)),
226 exception=autoupdater.ChromiumOSError(
227 'Update failed. Timed out waiting for system to mark'
228 ' new kernel as successful.'),
229 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
230
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700231 # Clean up any old autotest directories which may be lying around.
232 for path in global_config.global_config.get_config_value(
233 'AUTOSERV', 'client_autodir_paths', type=list):
234 self.run('rm -rf ' + path)
235
236
Richard Barnette82c35912012-11-20 10:09:10 -0800237 def _get_board_from_afe(self):
238 """Retrieve this host's board from its labels in the AFE.
239
240 Looks for a host label of the form "board:<board>", and
241 returns the "<board>" part of the label. `None` is returned
242 if there is not a single, unique label matching the pattern.
243
244 @returns board from label, or `None`.
245 """
246 host_model = models.Host.objects.get(hostname=self.hostname)
247 board_labels = filter(lambda l: l.name.startswith('board:'),
248 host_model.labels.all())
249 board_name = None
250 if len(board_labels) == 1:
251 board_name = board_labels[0].name.split(':', 1)[1]
252 elif len(board_labels) == 0:
253 logging.error('Host %s does not have a board label.',
254 self.hostname)
255 else:
256 logging.error('Host %s has multiple board labels.',
257 self.hostname)
258 return board_name
259
260
Richard Barnette03a0c132012-11-05 12:40:35 -0800261 def _servo_repair(self, board):
262 """Attempt to repair this host using an attached Servo.
263
264 Re-install the OS on the DUT by 1) installing a test image
265 on a USB storage device attached to the Servo board,
266 2) booting that image in recovery mode, and then
267 3) installing the image.
268
269 """
270 server = dev_server.ImageServer.devserver_url_for_servo(board)
271 image = server + (self._DEFAULT_SERVO_URL_FORMAT %
272 { 'board': board })
273 self.servo.install_recovery_image(image)
274 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
275 raise error.AutoservError('DUT failed to boot from USB'
276 ' after %d seconds' %
277 self.USB_BOOT_TIMEOUT)
278 self.run('chromeos-install --yes',
279 timeout=self._INSTALL_TIMEOUT)
280 self.servo.power_long_press()
281 self.servo.set('usb_mux_sel1', 'servo_sees_usbkey')
282 self.servo.power_short_press()
283 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
284 raise error.AutoservError('DUT failed to reboot installed '
285 'test image after %d seconds' %
286 self.BOOT_TIMEOUT)
287
288
Richard Barnette82c35912012-11-20 10:09:10 -0800289 def _powercycle_to_repair(self):
290 """Utilize the RPM Infrastructure to bring the host back up.
291
292 If the host is not up/repaired after the first powercycle we utilize
293 auto fallback to the last good install by powercycling and rebooting the
294 host 6 times.
295 """
296 logging.info('Attempting repair via RPM powercycle.')
297 failed_cycles = 0
298 self.power_cycle()
299 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
300 failed_cycles += 1
301 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
302 raise error.AutoservError('Powercycled host %s %d times; '
303 'device did not come back online.' %
304 (self.hostname, failed_cycles))
305 self.power_cycle()
306 if failed_cycles == 0:
307 logging.info('Powercycling was successful first time.')
308 else:
309 logging.info('Powercycling was successful after %d failures.',
310 failed_cycles)
311
312
313 def repair_full(self):
314 """Repair a host for repair level NO_PROTECTION.
315
316 This overrides the base class function for repair; it does
317 not call back to the parent class, but instead offers a
318 simplified implementation based on the capabilities in the
319 Chrome OS test lab.
320
321 Repair follows this sequence:
322 1. If the DUT passes `self.verify()`, do nothing.
323 2. If the DUT can be power-cycled via RPM, try to repair
324 by power-cycling.
325
326 As with the parent method, the last operation performed on
327 the DUT must be to call `self.verify()`; if that call fails,
328 the exception it raises is passed back to the caller.
329 """
330 try:
331 self.verify()
332 except:
333 host_board = self._get_board_from_afe()
Richard Barnette03a0c132012-11-05 12:40:35 -0800334 if host_board is None:
335 logging.error('host %s has no board; failing repair',
336 self.hostname)
Richard Barnette82c35912012-11-20 10:09:10 -0800337 raise
Richard Barnette03a0c132012-11-05 12:40:35 -0800338 if (self.servo and
339 host_board in self._SERVO_REPAIR_WHITELIST):
340 self._servo_repair(host_board)
341 elif (self.has_power() and
342 host_board in self._RPM_RECOVERY_BOARDS):
343 self._powercycle_to_repair()
344 else:
345 logging.error('host %s has no servo and no RPM control; '
346 'failing repair', self.hostname)
347 raise
Richard Barnette82c35912012-11-20 10:09:10 -0800348 self.verify()
349
350
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700351 def close(self):
352 super(SiteHost, self).close()
353 self.xmlrpc_disconnect_all()
354
355
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700356 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700357 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800358 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500359 try:
360 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
361 '_clear_login_prompt_state')
362 self.run('restart ui')
363 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
364 '_wait_for_login_prompt')
365 except error.AutoservRunError:
366 logging.warn('Unable to restart ui, rebooting device.')
367 # Since restarting the UI fails fall back to normal Autotest
368 # cleanup routines, i.e. reboot the machine.
369 super(SiteHost, self).cleanup()
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700370
371
Simran Basi154f5582012-10-23 16:27:11 -0700372 # TODO (sbasi) crosbug.com/35656
373 # Renamed the sitehost cleanup method so we don't go down this pathway.
374 # def cleanup(self):
375 def cleanup_poweron(self):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700376 """Special cleanup method to make sure hosts always get power back."""
Chris Sosa9479fcd2012-10-09 13:44:22 -0700377 super(SiteHost, self).cleanup()
Simran Basid5e5e272012-09-24 15:23:59 -0700378 if self.has_power():
Simran Basifd23fb22012-10-22 17:56:22 -0700379 try:
380 self.power_on()
381 except RemotePowerException:
382 # If cleanup has completed but there was an issue with the RPM
383 # Infrastructure, log an error message rather than fail cleanup
384 logging.error('Failed to turn Power On for this host after '
385 'cleanup through the RPM Infrastructure.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700386
387
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700388 def reboot(self, **dargs):
389 """
390 This function reboots the site host. The more generic
391 RemoteHost.reboot() performs sync and sleeps for 5
392 seconds. This is not necessary for Chrome OS devices as the
393 sync should be finished in a short time during the reboot
394 command.
395 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800396 if 'reboot_cmd' not in dargs:
397 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
398 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700399 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800400 if 'fastsync' not in dargs:
401 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700402 super(SiteHost, self).reboot(**dargs)
403
404
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700405 def verify_software(self):
406 """Ensure the stateful partition has space for Autotest and updates.
407
408 Similar to what is done by AbstractSSH, except instead of checking the
409 Autotest installation path, just check the stateful partition.
410
411 Checking the stateful partition is preferable in case it has been wiped,
412 resulting in an Autotest installation path which doesn't exist and isn't
413 writable. We still want to pass verify in this state since the partition
414 will be recovered with the next install.
415 """
416 super(SiteHost, self).verify_software()
417 self.check_diskspace(
418 '/mnt/stateful_partition',
419 global_config.global_config.get_config_value(
420 'SERVER', 'gb_diskspace_required', type=int,
421 default=20))
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700422
423
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700424 def xmlrpc_connect(self, command, port, cleanup=None):
425 """Connect to an XMLRPC server on the host.
426
427 The `command` argument should be a simple shell command that
428 starts an XMLRPC server on the given `port`. The command
429 must not daemonize, and must terminate cleanly on SIGTERM.
430 The command is started in the background on the host, and a
431 local XMLRPC client for the server is created and returned
432 to the caller.
433
434 Note that the process of creating an XMLRPC client makes no
435 attempt to connect to the remote server; the caller is
436 responsible for determining whether the server is running
437 correctly, and is ready to serve requests.
438
439 @param command Shell command to start the server.
440 @param port Port number on which the server is expected to
441 be serving.
442 """
443 self.xmlrpc_disconnect(port)
444
445 # Chrome OS on the target closes down most external ports
446 # for security. We could open the port, but doing that
447 # would conflict with security tests that check that only
448 # expected ports are open. So, to get to the port on the
449 # target we use an ssh tunnel.
450 local_port = utils.get_unused_port()
451 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
452 ssh_cmd = make_ssh_command(opts=tunnel_options)
453 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
454 logging.debug('Full tunnel command: %s', tunnel_cmd)
455 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
456 logging.debug('Started XMLRPC tunnel, local = %d'
457 ' remote = %d, pid = %d',
458 local_port, port, tunnel_proc.pid)
459
460 # Start the server on the host. Redirection in the command
461 # below is necessary, because 'ssh' won't terminate until
462 # background child processes close stdin, stdout, and
463 # stderr.
464 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
465 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
466 logging.debug('Started XMLRPC server on host %s, pid = %s',
467 self.hostname, remote_pid)
468
469 self._xmlrpc_proxy_map[port] = (cleanup, tunnel_proc)
470 rpc_url = 'http://localhost:%d' % local_port
471 return xmlrpclib.ServerProxy(rpc_url, allow_none=True)
472
473
474 def xmlrpc_disconnect(self, port):
475 """Disconnect from an XMLRPC server on the host.
476
477 Terminates the remote XMLRPC server previously started for
478 the given `port`. Also closes the local ssh tunnel created
479 for the connection to the host. This function does not
480 directly alter the state of a previously returned XMLRPC
481 client object; however disconnection will cause all
482 subsequent calls to methods on the object to fail.
483
484 This function does nothing if requested to disconnect a port
485 that was not previously connected via `self.xmlrpc_connect()`
486
487 @param port Port number passed to a previous call to
488 `xmlrpc_connect()`
489 """
490 if port not in self._xmlrpc_proxy_map:
491 return
492 entry = self._xmlrpc_proxy_map[port]
493 remote_name = entry[0]
494 tunnel_proc = entry[1]
495 if remote_name:
496 # We use 'pkill' to find our target process rather than
497 # a PID, because the host may have rebooted since
498 # connecting, and we don't want to kill an innocent
499 # process with the same PID.
500 #
501 # 'pkill' helpfully exits with status 1 if no target
502 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -0700503 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700504 # status.
505 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
506
507 if tunnel_proc.poll() is None:
508 tunnel_proc.terminate()
509 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
510 else:
511 logging.debug('Tunnel pid %d terminated early, status %d',
512 tunnel_proc.pid, tunnel_proc.returncode)
513 del self._xmlrpc_proxy_map[port]
514
515
516 def xmlrpc_disconnect_all(self):
517 """Disconnect all known XMLRPC proxy ports."""
518 for port in self._xmlrpc_proxy_map.keys():
519 self.xmlrpc_disconnect(port)
520
521
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700522 def _ping_is_up(self):
523 """Ping the host once, and return whether it responded."""
524 return utils.ping(self.hostname, tries=1, deadline=1) == 0
525
526
527 def _ping_wait_down(self, timeout):
528 """Wait until the host no longer responds to `ping`.
529
530 @param timeout Minimum time to allow before declaring the
531 host to be non-responsive.
532 """
533
534 # This function is a slightly faster version of wait_down().
535 #
536 # In AbstractSSHHost.wait_down(), `ssh` is used to determine
537 # whether the host is down. In some situations (mine, at
538 # least), `ssh` can take over a minute to determine that the
539 # host is down. The `ping` command answers the question
540 # faster, so we use that here instead.
541 #
542 # There is no equivalent for wait_up(), because a target that
543 # answers to `ping` won't necessarily respond to `ssh`.
544 end_time = time.time() + timeout
545 while time.time() <= end_time:
546 if not self._ping_is_up():
547 return True
548
549 # If the timeout is short relative to the run time of
550 # _ping_is_up(), we might be prone to false failures for
551 # lack of checking frequently enough. To be safe, we make
552 # one last check _after_ the deadline.
553 return not self._ping_is_up()
554
555
556 def test_wait_for_sleep(self):
557 """Wait for the client to enter low-power sleep mode.
558
559 The test for "is asleep" can't distinguish a system that is
560 powered off; to confirm that the unit was asleep, it is
561 necessary to force resume, and then call
562 `test_wait_for_resume()`.
563
564 This function is expected to be called from a test as part
565 of a sequence like the following:
566
567 ~~~~~~~~
568 boot_id = host.get_boot_id()
569 # trigger sleep on the host
570 host.test_wait_for_sleep()
571 # trigger resume on the host
572 host.test_wait_for_resume(boot_id)
573 ~~~~~~~~
574
575 @exception TestFail The host did not go to sleep within
576 the allowed time.
577 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700578 if not self._ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700579 raise error.TestFail(
580 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700581 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700582
583
584 def test_wait_for_resume(self, old_boot_id):
585 """Wait for the client to resume from low-power sleep mode.
586
587 The `old_boot_id` parameter should be the value from
588 `get_boot_id()` obtained prior to entering sleep mode. A
589 `TestFail` exception is raised if the boot id changes.
590
591 See @ref test_wait_for_sleep for more on this function's
592 usage.
593
594 @param[in] old_boot_id A boot id value obtained before the
595 target host went to sleep.
596
597 @exception TestFail The host did not respond within the
598 allowed time.
599 @exception TestFail The host responded, but the boot id test
600 indicated a reboot rather than a sleep
601 cycle.
602 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700603 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700604 raise error.TestFail(
605 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700606 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700607 else:
608 new_boot_id = self.get_boot_id()
609 if new_boot_id != old_boot_id:
610 raise error.TestFail(
611 'client rebooted, but sleep was expected'
612 ' (old boot %s, new boot %s)'
613 % (old_boot_id, new_boot_id))
614
615
616 def test_wait_for_shutdown(self):
617 """Wait for the client to shut down.
618
619 The test for "has shut down" can't distinguish a system that
620 is merely asleep; to confirm that the unit was down, it is
621 necessary to force boot, and then call test_wait_for_boot().
622
623 This function is expected to be called from a test as part
624 of a sequence like the following:
625
626 ~~~~~~~~
627 boot_id = host.get_boot_id()
628 # trigger shutdown on the host
629 host.test_wait_for_shutdown()
630 # trigger boot on the host
631 host.test_wait_for_boot(boot_id)
632 ~~~~~~~~
633
634 @exception TestFail The host did not shut down within the
635 allowed time.
636 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700637 if not self._ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700638 raise error.TestFail(
639 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700640 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700641
642
643 def test_wait_for_boot(self, old_boot_id=None):
644 """Wait for the client to boot from cold power.
645
646 The `old_boot_id` parameter should be the value from
647 `get_boot_id()` obtained prior to shutting down. A
648 `TestFail` exception is raised if the boot id does not
649 change. The boot id test is omitted if `old_boot_id` is not
650 specified.
651
652 See @ref test_wait_for_shutdown for more on this function's
653 usage.
654
655 @param[in] old_boot_id A boot id value obtained before the
656 shut down.
657
658 @exception TestFail The host did not respond within the
659 allowed time.
660 @exception TestFail The host responded, but the boot id test
661 indicated that there was no reboot.
662 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700663 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700664 raise error.TestFail(
665 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700666 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700667 elif old_boot_id:
668 if self.get_boot_id() == old_boot_id:
669 raise error.TestFail(
670 'client is back up, but did not reboot'
671 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -0700672
673
674 @staticmethod
675 def check_for_rpm_support(hostname):
676 """For a given hostname, return whether or not it is powered by an RPM.
677
678 @return None if this host does not follows the defined naming format
679 for RPM powered DUT's in the lab. If it does follow the format,
680 it returns a regular expression MatchObject instead.
681 """
Richard Barnette82c35912012-11-20 10:09:10 -0800682 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -0700683
684
685 def has_power(self):
686 """For this host, return whether or not it is powered by an RPM.
687
688 @return True if this host is in the CROS lab and follows the defined
689 naming format.
690 """
691 return SiteHost.check_for_rpm_support(self.hostname)
692
693
Simran Basid5e5e272012-09-24 15:23:59 -0700694 def power_off(self):
Simran Basidcff4252012-11-20 16:13:20 -0800695 rpm_client.set_power(self.hostname, 'OFF')
Simran Basid5e5e272012-09-24 15:23:59 -0700696
697
698 def power_on(self):
Simran Basidcff4252012-11-20 16:13:20 -0800699 rpm_client.set_power(self.hostname, 'ON')
Simran Basid5e5e272012-09-24 15:23:59 -0700700
701
702 def power_cycle(self):
Simran Basidcff4252012-11-20 16:13:20 -0800703 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -0700704
705
706 def get_platform(self):
707 """Determine the correct platform label for this host.
708
709 @returns a string representing this host's platform.
710 """
711 crossystem = utils.Crossystem(self)
712 crossystem.init()
713 # Extract fwid value and use the leading part as the platform id.
714 # fwid generally follow the format of {platform}.{firmware version}
715 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
716 platform = crossystem.fwid().split('.')[0].lower()
717 # Newer platforms start with 'Google_' while the older ones do not.
718 return platform.replace('google_', '')
719
720
Richard Barnette82c35912012-11-20 10:09:10 -0800721 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700722 def get_board(self):
723 """Determine the correct board label for this host.
724
725 @returns a string representing this host's board.
726 """
727 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
728 run_method=self.run)
729 board = release_info['CHROMEOS_RELEASE_BOARD']
730 # Devices in the lab generally have the correct board name but our own
731 # development devices have {board_name}-signed-{key_type}. The board
732 # name may also begin with 'x86-' which we need to keep.
733 if 'x86' not in board:
734 return 'board:%s' % board.split('-')[0]
735 return 'board:%s' % '-'.join(board.split('-')[0:2])
736
737
Richard Barnette82c35912012-11-20 10:09:10 -0800738 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700739 def has_lightsensor(self):
740 """Determine the correct board label for this host.
741
742 @returns the string 'lightsensor' if this host has a lightsensor or
743 None if it does not.
744 """
745 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -0800746 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -0700747 try:
748 # Run the search cmd following the symlinks. Stderr_tee is set to
749 # None as there can be a symlink loop, but the command will still
750 # execute correctly with a few messages printed to stderr.
751 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
752 return 'lightsensor'
753 except error.AutoservRunError:
754 # egrep exited with a return code of 1 meaning none of the possible
755 # lightsensor files existed.
756 return None
757
758
Richard Barnette82c35912012-11-20 10:09:10 -0800759 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700760 def has_bluetooth(self):
761 """Determine the correct board label for this host.
762
763 @returns the string 'bluetooth' if this host has bluetooth or
764 None if it does not.
765 """
766 try:
767 self.run('test -d /sys/class/bluetooth/hci0')
768 # test exited with a return code of 0.
769 return 'bluetooth'
770 except error.AutoservRunError:
771 # test exited with a return code 1 meaning the directory did not
772 # exist.
773 return None
774
775
776 def get_labels(self):
777 """Return a list of labels for this given host.
778
779 This is the main way to retrieve all the automatic labels for a host
780 as it will run through all the currently implemented label functions.
781 """
782 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -0800783 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -0700784 label = label_function(self)
785 if label:
786 labels.append(label)
787 return labels