blob: ea6cdd7b45ce2c66ebed8ac364c2e443a10de049 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
J. Richard Barnette1d78b012012-05-15 13:56:30 -07005import logging
Simran Basid5e5e272012-09-24 15:23:59 -07006import re
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07008import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -07009import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070010
J. Richard Barnette45e93de2012-04-11 17:24:15 -070011from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080012from autotest_lib.client.common_lib import error
13from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070014from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette82c35912012-11-20 10:09:10 -080015from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070016from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070017from autotest_lib.server import autotest
J. Richard Barnette45e93de2012-04-11 17:24:15 -070018from autotest_lib.server import site_host_attributes
J. Richard Barnette67ccb872012-04-19 16:34:56 -070019from autotest_lib.server.cros import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070020from autotest_lib.server.hosts import remote
Simran Basidcff4252012-11-20 16:13:20 -080021from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070022
Richard Barnette82c35912012-11-20 10:09:10 -080023# Importing frontend.afe.models requires a full Autotest
24# installation (with the Django modules), not just the source
25# repository. Most developers won't have the full installation, so
26# the imports below will fail for them.
27#
28# The fix is to catch import exceptions, and set `models` to `None`
29# on failure. This has the side effect that
30# SiteHost._get_board_from_afe() will fail: That will manifest as
31# failures during Repair jobs leaving the DUT as "Repair Failed".
32# In practice, you can't test Repair jobs without a full
33# installation, so that kind of failure isn't expected.
34try:
35 from autotest_lib.frontend import setup_django_environment
36 from autotest_lib.frontend.afe import models
37except:
38 models = None
39
Simran Basid5e5e272012-09-24 15:23:59 -070040
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070041def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
42 connect_timeout=None, alive_interval=None):
43 """Override default make_ssh_command to use options tuned for Chrome OS.
44
45 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070046 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
47 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070048
Dale Curtisaa5eedb2011-08-23 16:18:52 -070049 - ServerAliveInterval=180; which causes SSH to ping connection every
50 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
51 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
52 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070053
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070054 - ServerAliveCountMax=3; consistency with remote_access.sh.
55
56 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
57 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070058
59 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
60 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070061
62 - SSH protocol forced to 2; needed for ServerAliveInterval.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070063 """
64 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
65 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070066 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
67 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
68 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070069 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070070
71
Simran Basic6f1f7a2012-10-16 10:47:46 -070072def add_function_to_list(functions_list):
73 """Decorator used to group functions together into the provided list."""
74 def add_func(func):
75 functions_list.append(func)
76 return func
77 return add_func
78
79
J. Richard Barnette45e93de2012-04-11 17:24:15 -070080class SiteHost(remote.RemoteHost):
81 """Chromium OS specific subclass of Host."""
82
83 _parser = autoserv_parser.autoserv_parser
84
Richard Barnette0c73ffc2012-11-19 15:21:18 -080085 # Time to wait for new kernel to be marked successful after
86 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -070087 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -070088
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070089 # Timeout values associated with various Chrome OS state
Richard Barnette0c73ffc2012-11-19 15:21:18 -080090 # changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070091 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080092 # In general, a good rule of thumb is that the timeout can be up
93 # to twice the typical measured value on the slowest platform.
94 # The times here have not necessarily been empirically tested to
95 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070096 #
97 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080098 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
99 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700100 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800101 # other things, this must account for the 30 second dev-mode
102 # screen delay and time to start the network,
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700103 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800104 # including the 30 second dev-mode delay and time to start the
105 # network,
106 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700107 # REBOOT_TIMEOUT: Combination of shutdown and reboot times.
108
109 SLEEP_TIMEOUT = 2
110 RESUME_TIMEOUT = 5
111 BOOT_TIMEOUT = 45
112 USB_BOOT_TIMEOUT = 150
113 SHUTDOWN_TIMEOUT = 5
114 REBOOT_TIMEOUT = SHUTDOWN_TIMEOUT + BOOT_TIMEOUT
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800115
116
Richard Barnette82c35912012-11-20 10:09:10 -0800117 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
118 'rpm_recovery_boards', type=str).split(',')
119
120 _MAX_POWER_CYCLE_ATTEMPTS = 6
121 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
122 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
123 'host[0-9]+')
124 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
125 'in_illuminance0_raw',
126 'illuminance0_input']
127 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
128 _LABEL_FUNCTIONS = []
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700129
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800130
J. Richard Barnette964fba02012-10-24 17:34:29 -0700131 @staticmethod
132 def get_servo_arguments(arglist):
133 servo_args = {}
134 for arg in ('servo_host', 'servo_port'):
135 if arg in arglist:
136 servo_args[arg] = arglist[arg]
137 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700138
J. Richard Barnette964fba02012-10-24 17:34:29 -0700139
140 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700141 """Initialize superclasses, and |self.servo|.
142
143 For creating the host servo object, there are three
144 possibilities: First, if the host is a lab system known to
145 have a servo board, we connect to that servo unconditionally.
146 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700147 servo features for testing, it will pass settings for
148 `servo_host`, `servo_port`, or both. If neither of these
149 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700150
151 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700152 super(SiteHost, self)._initialize(hostname=hostname,
153 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700154 # self.env is a dictionary of environment variable settings
155 # to be exported for commands run on the host.
156 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
157 # errors that might happen.
158 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700159 self._xmlrpc_proxy_map = {}
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700160 self.servo = servo.Servo.get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700161 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700162 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700163
164
Chris Sosaa3ac2152012-05-23 22:23:13 -0700165 def machine_install(self, update_url=None, force_update=False,
166 local_devserver=False):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700167 if not update_url and self._parser.options.image:
168 update_url = self._parser.options.image
169 elif not update_url:
170 raise autoupdater.ChromiumOSError(
171 'Update failed. No update URL provided.')
172
173 # Attempt to update the system.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700174 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
175 local_devserver=local_devserver)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700176 if updater.run_update(force_update):
177 # Figure out active and inactive kernel.
178 active_kernel, inactive_kernel = updater.get_kernel_state()
179
180 # Ensure inactive kernel has higher priority than active.
181 if (updater.get_kernel_priority(inactive_kernel)
182 < updater.get_kernel_priority(active_kernel)):
183 raise autoupdater.ChromiumOSError(
184 'Update failed. The priority of the inactive kernel'
185 ' partition is less than that of the active kernel'
186 ' partition.')
187
Scott Zawalski21902002012-09-19 17:57:00 -0400188 update_engine_log = '/var/log/update_engine.log'
189 logging.info('Dumping %s', update_engine_log)
190 self.run('cat %s' % update_engine_log)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800191 # Updater has returned successfully; reboot the host.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700192 self.reboot(timeout=60, wait=True)
Chris Sosae146ed82012-09-19 17:58:36 -0700193 # Touch the lab machine file to leave a marker that distinguishes
194 # this image from other test images.
Richard Barnette82c35912012-11-20 10:09:10 -0800195 self.run('touch %s' % self._LAB_MACHINE_FILE)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700196
197 # Following the reboot, verify the correct version.
198 updater.check_version()
199
200 # Figure out newly active kernel.
201 new_active_kernel, _ = updater.get_kernel_state()
202
203 # Ensure that previously inactive kernel is now the active kernel.
204 if new_active_kernel != inactive_kernel:
205 raise autoupdater.ChromiumOSError(
206 'Update failed. New kernel partition is not active after'
207 ' boot.')
208
209 host_attributes = site_host_attributes.HostAttributes(self.hostname)
210 if host_attributes.has_chromeos_firmware:
211 # Wait until tries == 0 and success, or until timeout.
212 utils.poll_for_condition(
213 lambda: (updater.get_kernel_tries(new_active_kernel) == 0
214 and updater.get_kernel_success(new_active_kernel)),
215 exception=autoupdater.ChromiumOSError(
216 'Update failed. Timed out waiting for system to mark'
217 ' new kernel as successful.'),
218 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
219
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700220 # Clean up any old autotest directories which may be lying around.
221 for path in global_config.global_config.get_config_value(
222 'AUTOSERV', 'client_autodir_paths', type=list):
223 self.run('rm -rf ' + path)
224
225
Richard Barnette82c35912012-11-20 10:09:10 -0800226 def _get_board_from_afe(self):
227 """Retrieve this host's board from its labels in the AFE.
228
229 Looks for a host label of the form "board:<board>", and
230 returns the "<board>" part of the label. `None` is returned
231 if there is not a single, unique label matching the pattern.
232
233 @returns board from label, or `None`.
234 """
235 host_model = models.Host.objects.get(hostname=self.hostname)
236 board_labels = filter(lambda l: l.name.startswith('board:'),
237 host_model.labels.all())
238 board_name = None
239 if len(board_labels) == 1:
240 board_name = board_labels[0].name.split(':', 1)[1]
241 elif len(board_labels) == 0:
242 logging.error('Host %s does not have a board label.',
243 self.hostname)
244 else:
245 logging.error('Host %s has multiple board labels.',
246 self.hostname)
247 return board_name
248
249
250 def _powercycle_to_repair(self):
251 """Utilize the RPM Infrastructure to bring the host back up.
252
253 If the host is not up/repaired after the first powercycle we utilize
254 auto fallback to the last good install by powercycling and rebooting the
255 host 6 times.
256 """
257 logging.info('Attempting repair via RPM powercycle.')
258 failed_cycles = 0
259 self.power_cycle()
260 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
261 failed_cycles += 1
262 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
263 raise error.AutoservError('Powercycled host %s %d times; '
264 'device did not come back online.' %
265 (self.hostname, failed_cycles))
266 self.power_cycle()
267 if failed_cycles == 0:
268 logging.info('Powercycling was successful first time.')
269 else:
270 logging.info('Powercycling was successful after %d failures.',
271 failed_cycles)
272
273
274 def repair_full(self):
275 """Repair a host for repair level NO_PROTECTION.
276
277 This overrides the base class function for repair; it does
278 not call back to the parent class, but instead offers a
279 simplified implementation based on the capabilities in the
280 Chrome OS test lab.
281
282 Repair follows this sequence:
283 1. If the DUT passes `self.verify()`, do nothing.
284 2. If the DUT can be power-cycled via RPM, try to repair
285 by power-cycling.
286
287 As with the parent method, the last operation performed on
288 the DUT must be to call `self.verify()`; if that call fails,
289 the exception it raises is passed back to the caller.
290 """
291 try:
292 self.verify()
293 except:
294 host_board = self._get_board_from_afe()
295 if (host_board is None or not self.has_power() or
296 host_board not in self._RPM_RECOVERY_BOARDS):
297 raise
298 self._powercycle_to_repair()
299 self.verify()
300
301
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700302 def close(self):
303 super(SiteHost, self).close()
304 self.xmlrpc_disconnect_all()
305
306
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700307 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700308 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800309 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500310 try:
311 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
312 '_clear_login_prompt_state')
313 self.run('restart ui')
314 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
315 '_wait_for_login_prompt')
316 except error.AutoservRunError:
317 logging.warn('Unable to restart ui, rebooting device.')
318 # Since restarting the UI fails fall back to normal Autotest
319 # cleanup routines, i.e. reboot the machine.
320 super(SiteHost, self).cleanup()
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700321
322
Simran Basi154f5582012-10-23 16:27:11 -0700323 # TODO (sbasi) crosbug.com/35656
324 # Renamed the sitehost cleanup method so we don't go down this pathway.
325 # def cleanup(self):
326 def cleanup_poweron(self):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700327 """Special cleanup method to make sure hosts always get power back."""
Chris Sosa9479fcd2012-10-09 13:44:22 -0700328 super(SiteHost, self).cleanup()
Simran Basid5e5e272012-09-24 15:23:59 -0700329 if self.has_power():
Simran Basifd23fb22012-10-22 17:56:22 -0700330 try:
331 self.power_on()
332 except RemotePowerException:
333 # If cleanup has completed but there was an issue with the RPM
334 # Infrastructure, log an error message rather than fail cleanup
335 logging.error('Failed to turn Power On for this host after '
336 'cleanup through the RPM Infrastructure.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700337
338
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700339 def reboot(self, **dargs):
340 """
341 This function reboots the site host. The more generic
342 RemoteHost.reboot() performs sync and sleeps for 5
343 seconds. This is not necessary for Chrome OS devices as the
344 sync should be finished in a short time during the reboot
345 command.
346 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800347 if 'reboot_cmd' not in dargs:
348 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
349 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700350 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800351 if 'fastsync' not in dargs:
352 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700353 super(SiteHost, self).reboot(**dargs)
354
355
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700356 def verify_software(self):
357 """Ensure the stateful partition has space for Autotest and updates.
358
359 Similar to what is done by AbstractSSH, except instead of checking the
360 Autotest installation path, just check the stateful partition.
361
362 Checking the stateful partition is preferable in case it has been wiped,
363 resulting in an Autotest installation path which doesn't exist and isn't
364 writable. We still want to pass verify in this state since the partition
365 will be recovered with the next install.
366 """
367 super(SiteHost, self).verify_software()
368 self.check_diskspace(
369 '/mnt/stateful_partition',
370 global_config.global_config.get_config_value(
371 'SERVER', 'gb_diskspace_required', type=int,
372 default=20))
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700373
374
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700375 def xmlrpc_connect(self, command, port, cleanup=None):
376 """Connect to an XMLRPC server on the host.
377
378 The `command` argument should be a simple shell command that
379 starts an XMLRPC server on the given `port`. The command
380 must not daemonize, and must terminate cleanly on SIGTERM.
381 The command is started in the background on the host, and a
382 local XMLRPC client for the server is created and returned
383 to the caller.
384
385 Note that the process of creating an XMLRPC client makes no
386 attempt to connect to the remote server; the caller is
387 responsible for determining whether the server is running
388 correctly, and is ready to serve requests.
389
390 @param command Shell command to start the server.
391 @param port Port number on which the server is expected to
392 be serving.
393 """
394 self.xmlrpc_disconnect(port)
395
396 # Chrome OS on the target closes down most external ports
397 # for security. We could open the port, but doing that
398 # would conflict with security tests that check that only
399 # expected ports are open. So, to get to the port on the
400 # target we use an ssh tunnel.
401 local_port = utils.get_unused_port()
402 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
403 ssh_cmd = make_ssh_command(opts=tunnel_options)
404 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
405 logging.debug('Full tunnel command: %s', tunnel_cmd)
406 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
407 logging.debug('Started XMLRPC tunnel, local = %d'
408 ' remote = %d, pid = %d',
409 local_port, port, tunnel_proc.pid)
410
411 # Start the server on the host. Redirection in the command
412 # below is necessary, because 'ssh' won't terminate until
413 # background child processes close stdin, stdout, and
414 # stderr.
415 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
416 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
417 logging.debug('Started XMLRPC server on host %s, pid = %s',
418 self.hostname, remote_pid)
419
420 self._xmlrpc_proxy_map[port] = (cleanup, tunnel_proc)
421 rpc_url = 'http://localhost:%d' % local_port
422 return xmlrpclib.ServerProxy(rpc_url, allow_none=True)
423
424
425 def xmlrpc_disconnect(self, port):
426 """Disconnect from an XMLRPC server on the host.
427
428 Terminates the remote XMLRPC server previously started for
429 the given `port`. Also closes the local ssh tunnel created
430 for the connection to the host. This function does not
431 directly alter the state of a previously returned XMLRPC
432 client object; however disconnection will cause all
433 subsequent calls to methods on the object to fail.
434
435 This function does nothing if requested to disconnect a port
436 that was not previously connected via `self.xmlrpc_connect()`
437
438 @param port Port number passed to a previous call to
439 `xmlrpc_connect()`
440 """
441 if port not in self._xmlrpc_proxy_map:
442 return
443 entry = self._xmlrpc_proxy_map[port]
444 remote_name = entry[0]
445 tunnel_proc = entry[1]
446 if remote_name:
447 # We use 'pkill' to find our target process rather than
448 # a PID, because the host may have rebooted since
449 # connecting, and we don't want to kill an innocent
450 # process with the same PID.
451 #
452 # 'pkill' helpfully exits with status 1 if no target
453 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -0700454 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700455 # status.
456 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
457
458 if tunnel_proc.poll() is None:
459 tunnel_proc.terminate()
460 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
461 else:
462 logging.debug('Tunnel pid %d terminated early, status %d',
463 tunnel_proc.pid, tunnel_proc.returncode)
464 del self._xmlrpc_proxy_map[port]
465
466
467 def xmlrpc_disconnect_all(self):
468 """Disconnect all known XMLRPC proxy ports."""
469 for port in self._xmlrpc_proxy_map.keys():
470 self.xmlrpc_disconnect(port)
471
472
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700473 def _ping_is_up(self):
474 """Ping the host once, and return whether it responded."""
475 return utils.ping(self.hostname, tries=1, deadline=1) == 0
476
477
478 def _ping_wait_down(self, timeout):
479 """Wait until the host no longer responds to `ping`.
480
481 @param timeout Minimum time to allow before declaring the
482 host to be non-responsive.
483 """
484
485 # This function is a slightly faster version of wait_down().
486 #
487 # In AbstractSSHHost.wait_down(), `ssh` is used to determine
488 # whether the host is down. In some situations (mine, at
489 # least), `ssh` can take over a minute to determine that the
490 # host is down. The `ping` command answers the question
491 # faster, so we use that here instead.
492 #
493 # There is no equivalent for wait_up(), because a target that
494 # answers to `ping` won't necessarily respond to `ssh`.
495 end_time = time.time() + timeout
496 while time.time() <= end_time:
497 if not self._ping_is_up():
498 return True
499
500 # If the timeout is short relative to the run time of
501 # _ping_is_up(), we might be prone to false failures for
502 # lack of checking frequently enough. To be safe, we make
503 # one last check _after_ the deadline.
504 return not self._ping_is_up()
505
506
507 def test_wait_for_sleep(self):
508 """Wait for the client to enter low-power sleep mode.
509
510 The test for "is asleep" can't distinguish a system that is
511 powered off; to confirm that the unit was asleep, it is
512 necessary to force resume, and then call
513 `test_wait_for_resume()`.
514
515 This function is expected to be called from a test as part
516 of a sequence like the following:
517
518 ~~~~~~~~
519 boot_id = host.get_boot_id()
520 # trigger sleep on the host
521 host.test_wait_for_sleep()
522 # trigger resume on the host
523 host.test_wait_for_resume(boot_id)
524 ~~~~~~~~
525
526 @exception TestFail The host did not go to sleep within
527 the allowed time.
528 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700529 if not self._ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700530 raise error.TestFail(
531 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700532 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700533
534
535 def test_wait_for_resume(self, old_boot_id):
536 """Wait for the client to resume from low-power sleep mode.
537
538 The `old_boot_id` parameter should be the value from
539 `get_boot_id()` obtained prior to entering sleep mode. A
540 `TestFail` exception is raised if the boot id changes.
541
542 See @ref test_wait_for_sleep for more on this function's
543 usage.
544
545 @param[in] old_boot_id A boot id value obtained before the
546 target host went to sleep.
547
548 @exception TestFail The host did not respond within the
549 allowed time.
550 @exception TestFail The host responded, but the boot id test
551 indicated a reboot rather than a sleep
552 cycle.
553 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700554 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700555 raise error.TestFail(
556 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700557 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700558 else:
559 new_boot_id = self.get_boot_id()
560 if new_boot_id != old_boot_id:
561 raise error.TestFail(
562 'client rebooted, but sleep was expected'
563 ' (old boot %s, new boot %s)'
564 % (old_boot_id, new_boot_id))
565
566
567 def test_wait_for_shutdown(self):
568 """Wait for the client to shut down.
569
570 The test for "has shut down" can't distinguish a system that
571 is merely asleep; to confirm that the unit was down, it is
572 necessary to force boot, and then call test_wait_for_boot().
573
574 This function is expected to be called from a test as part
575 of a sequence like the following:
576
577 ~~~~~~~~
578 boot_id = host.get_boot_id()
579 # trigger shutdown on the host
580 host.test_wait_for_shutdown()
581 # trigger boot on the host
582 host.test_wait_for_boot(boot_id)
583 ~~~~~~~~
584
585 @exception TestFail The host did not shut down within the
586 allowed time.
587 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700588 if not self._ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700589 raise error.TestFail(
590 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700591 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700592
593
594 def test_wait_for_boot(self, old_boot_id=None):
595 """Wait for the client to boot from cold power.
596
597 The `old_boot_id` parameter should be the value from
598 `get_boot_id()` obtained prior to shutting down. A
599 `TestFail` exception is raised if the boot id does not
600 change. The boot id test is omitted if `old_boot_id` is not
601 specified.
602
603 See @ref test_wait_for_shutdown for more on this function's
604 usage.
605
606 @param[in] old_boot_id A boot id value obtained before the
607 shut down.
608
609 @exception TestFail The host did not respond within the
610 allowed time.
611 @exception TestFail The host responded, but the boot id test
612 indicated that there was no reboot.
613 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700614 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700615 raise error.TestFail(
616 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700617 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700618 elif old_boot_id:
619 if self.get_boot_id() == old_boot_id:
620 raise error.TestFail(
621 'client is back up, but did not reboot'
622 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -0700623
624
625 @staticmethod
626 def check_for_rpm_support(hostname):
627 """For a given hostname, return whether or not it is powered by an RPM.
628
629 @return None if this host does not follows the defined naming format
630 for RPM powered DUT's in the lab. If it does follow the format,
631 it returns a regular expression MatchObject instead.
632 """
Richard Barnette82c35912012-11-20 10:09:10 -0800633 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -0700634
635
636 def has_power(self):
637 """For this host, return whether or not it is powered by an RPM.
638
639 @return True if this host is in the CROS lab and follows the defined
640 naming format.
641 """
642 return SiteHost.check_for_rpm_support(self.hostname)
643
644
Simran Basid5e5e272012-09-24 15:23:59 -0700645 def power_off(self):
Simran Basidcff4252012-11-20 16:13:20 -0800646 rpm_client.set_power(self.hostname, 'OFF')
Simran Basid5e5e272012-09-24 15:23:59 -0700647
648
649 def power_on(self):
Simran Basidcff4252012-11-20 16:13:20 -0800650 rpm_client.set_power(self.hostname, 'ON')
Simran Basid5e5e272012-09-24 15:23:59 -0700651
652
653 def power_cycle(self):
Simran Basidcff4252012-11-20 16:13:20 -0800654 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -0700655
656
657 def get_platform(self):
658 """Determine the correct platform label for this host.
659
660 @returns a string representing this host's platform.
661 """
662 crossystem = utils.Crossystem(self)
663 crossystem.init()
664 # Extract fwid value and use the leading part as the platform id.
665 # fwid generally follow the format of {platform}.{firmware version}
666 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
667 platform = crossystem.fwid().split('.')[0].lower()
668 # Newer platforms start with 'Google_' while the older ones do not.
669 return platform.replace('google_', '')
670
671
Richard Barnette82c35912012-11-20 10:09:10 -0800672 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700673 def get_board(self):
674 """Determine the correct board label for this host.
675
676 @returns a string representing this host's board.
677 """
678 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
679 run_method=self.run)
680 board = release_info['CHROMEOS_RELEASE_BOARD']
681 # Devices in the lab generally have the correct board name but our own
682 # development devices have {board_name}-signed-{key_type}. The board
683 # name may also begin with 'x86-' which we need to keep.
684 if 'x86' not in board:
685 return 'board:%s' % board.split('-')[0]
686 return 'board:%s' % '-'.join(board.split('-')[0:2])
687
688
Richard Barnette82c35912012-11-20 10:09:10 -0800689 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700690 def has_lightsensor(self):
691 """Determine the correct board label for this host.
692
693 @returns the string 'lightsensor' if this host has a lightsensor or
694 None if it does not.
695 """
696 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -0800697 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -0700698 try:
699 # Run the search cmd following the symlinks. Stderr_tee is set to
700 # None as there can be a symlink loop, but the command will still
701 # execute correctly with a few messages printed to stderr.
702 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
703 return 'lightsensor'
704 except error.AutoservRunError:
705 # egrep exited with a return code of 1 meaning none of the possible
706 # lightsensor files existed.
707 return None
708
709
Richard Barnette82c35912012-11-20 10:09:10 -0800710 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700711 def has_bluetooth(self):
712 """Determine the correct board label for this host.
713
714 @returns the string 'bluetooth' if this host has bluetooth or
715 None if it does not.
716 """
717 try:
718 self.run('test -d /sys/class/bluetooth/hci0')
719 # test exited with a return code of 0.
720 return 'bluetooth'
721 except error.AutoservRunError:
722 # test exited with a return code 1 meaning the directory did not
723 # exist.
724 return None
725
726
727 def get_labels(self):
728 """Return a list of labels for this given host.
729
730 This is the main way to retrieve all the automatic labels for a host
731 as it will run through all the currently implemented label functions.
732 """
733 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -0800734 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -0700735 label = label_function(self)
736 if label:
737 labels.append(label)
738 return labels