blob: ccdd479ca5b7d4e9d07431d15e70f5dbe0151c4e [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
J. Richard Barnette1d78b012012-05-15 13:56:30 -07005import logging
Simran Basid5e5e272012-09-24 15:23:59 -07006import re
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07008import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -07009import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070010
J. Richard Barnette45e93de2012-04-11 17:24:15 -070011from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080012from autotest_lib.client.common_lib import error
13from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070014from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080015from autotest_lib.client.common_lib.cros import dev_server
Richard Barnette82c35912012-11-20 10:09:10 -080016from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070018from autotest_lib.server import autotest
J. Richard Barnette45e93de2012-04-11 17:24:15 -070019from autotest_lib.server import site_host_attributes
J. Richard Barnette67ccb872012-04-19 16:34:56 -070020from autotest_lib.server.cros import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.server.hosts import remote
Simran Basidcff4252012-11-20 16:13:20 -080022from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070023
Richard Barnette82c35912012-11-20 10:09:10 -080024# Importing frontend.afe.models requires a full Autotest
25# installation (with the Django modules), not just the source
26# repository. Most developers won't have the full installation, so
27# the imports below will fail for them.
28#
29# The fix is to catch import exceptions, and set `models` to `None`
30# on failure. This has the side effect that
31# SiteHost._get_board_from_afe() will fail: That will manifest as
32# failures during Repair jobs leaving the DUT as "Repair Failed".
33# In practice, you can't test Repair jobs without a full
34# installation, so that kind of failure isn't expected.
35try:
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080036 # pylint: disable=W0611
Richard Barnette82c35912012-11-20 10:09:10 -080037 from autotest_lib.frontend import setup_django_environment
38 from autotest_lib.frontend.afe import models
39except:
40 models = None
41
Simran Basid5e5e272012-09-24 15:23:59 -070042
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080043def _make_servo_hostname(hostname):
44 host_parts = hostname.split('.')
45 host_parts[0] = host_parts[0] + '-servo'
46 return '.'.join(host_parts)
47
48
49def _get_lab_servo(target_hostname):
50 """Instantiate a Servo for |target_hostname| in the lab.
51
52 Assuming that |target_hostname| is a device in the CrOS test
53 lab, create and return a Servo object pointed at the servo
54 attached to that DUT. The servo in the test lab is assumed
55 to already have servod up and running on it.
56
57 @param target_hostname: device whose servo we want to target.
58 @return an appropriately configured Servo instance.
59 """
60 servo_host = _make_servo_hostname(target_hostname)
61 if utils.host_is_in_lab_zone(servo_host):
62 try:
63 return servo.Servo(
64 servo_host=servo_host, target_host=target_hostname)
65 except: # pylint: disable=W0702
66 # TODO(jrbarnette): Long-term, if we can't get to
67 # a servo in the lab, we want to fail, so we should
68 # pass any exceptions along. Short-term, we're not
69 # ready to rely on servo, so we ignore failures.
70 pass
71 return None
72
73
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070074def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
75 connect_timeout=None, alive_interval=None):
76 """Override default make_ssh_command to use options tuned for Chrome OS.
77
78 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070079 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
80 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070081
Dale Curtisaa5eedb2011-08-23 16:18:52 -070082 - ServerAliveInterval=180; which causes SSH to ping connection every
83 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
84 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
85 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070086
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070087 - ServerAliveCountMax=3; consistency with remote_access.sh.
88
89 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
90 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070091
92 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
93 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070094
95 - SSH protocol forced to 2; needed for ServerAliveInterval.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080096
97 @param user User name to use for the ssh connection.
98 @param port Port on the target host to use for ssh connection.
99 @param opts Additional options to the ssh command.
100 @param hosts_file Ignored.
101 @param connect_timeout Ignored.
102 @param alive_interval Ignored.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700103 """
104 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
105 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -0700106 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
107 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
108 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700109 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700110
111
Simran Basic6f1f7a2012-10-16 10:47:46 -0700112def add_function_to_list(functions_list):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800113 """Decorator used to group functions together into the provided list.
114
115 @param functions_list list to which the decorated function will
116 be added.
117 """
118 # pylint: disable=C0111
Simran Basic6f1f7a2012-10-16 10:47:46 -0700119 def add_func(func):
120 functions_list.append(func)
121 return func
122 return add_func
123
124
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700125class SiteHost(remote.RemoteHost):
126 """Chromium OS specific subclass of Host."""
127
128 _parser = autoserv_parser.autoserv_parser
129
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800130 # Time to wait for new kernel to be marked successful after
131 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -0700132 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700133
Richard Barnette03a0c132012-11-05 12:40:35 -0800134 # Timeout values (in seconds) associated with various Chrome OS
135 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700136 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800137 # In general, a good rule of thumb is that the timeout can be up
138 # to twice the typical measured value on the slowest platform.
139 # The times here have not necessarily been empirically tested to
140 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700141 #
142 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800143 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
144 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700145 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800146 # other things, this must account for the 30 second dev-mode
147 # screen delay and time to start the network,
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700148 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800149 # including the 30 second dev-mode delay and time to start the
150 # network,
151 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700152 # REBOOT_TIMEOUT: Combination of shutdown and reboot times.
Richard Barnette03a0c132012-11-05 12:40:35 -0800153 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700154
155 SLEEP_TIMEOUT = 2
156 RESUME_TIMEOUT = 5
157 BOOT_TIMEOUT = 45
158 USB_BOOT_TIMEOUT = 150
159 SHUTDOWN_TIMEOUT = 5
160 REBOOT_TIMEOUT = SHUTDOWN_TIMEOUT + BOOT_TIMEOUT
Richard Barnette03a0c132012-11-05 12:40:35 -0800161 _INSTALL_TIMEOUT = 240
162
163 _DEFAULT_SERVO_URL_FORMAT = ('/static/servo-images/'
164 '%(board)s_test_image.bin')
165
166 # TODO(jrbarnette): Servo repair is restricted to x86-alex,
167 # because the existing servo client code won't work on other
168 # boards. http://crosbug.com/36973
169 _SERVO_REPAIR_WHITELIST = [ 'x86-alex' ]
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800170
171
Richard Barnette82c35912012-11-20 10:09:10 -0800172 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
173 'rpm_recovery_boards', type=str).split(',')
174
175 _MAX_POWER_CYCLE_ATTEMPTS = 6
176 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
177 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
178 'host[0-9]+')
179 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
180 'in_illuminance0_raw',
181 'illuminance0_input']
182 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
183 _LABEL_FUNCTIONS = []
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700184
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800185
J. Richard Barnette964fba02012-10-24 17:34:29 -0700186 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800187 def get_servo_arguments(args_dict):
188 """Extract servo options from `args_dict` and return the result.
189
190 Take the provided dictionary of argument options and return
191 a subset that represent standard arguments needed to
192 construct a servo object for a host. The intent is to
193 provide standard argument processing from run_remote_tests
194 for tests that require a servo to operate.
195
196 Recommended usage:
197 ~~~~~~~~
198 args_dict = utils.args_to_dict(args)
199 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
200 host = hosts.create_host(machine, servo_args=servo_args)
201 ~~~~~~~~
202
203 @param args_dict Dictionary from which to extract the servo
204 arguments.
205 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700206 servo_args = {}
207 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800208 if arg in args_dict:
209 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700210 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700211
J. Richard Barnette964fba02012-10-24 17:34:29 -0700212
213 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700214 """Initialize superclasses, and |self.servo|.
215
216 For creating the host servo object, there are three
217 possibilities: First, if the host is a lab system known to
218 have a servo board, we connect to that servo unconditionally.
219 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700220 servo features for testing, it will pass settings for
221 `servo_host`, `servo_port`, or both. If neither of these
222 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700223
224 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700225 super(SiteHost, self)._initialize(hostname=hostname,
226 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700227 # self.env is a dictionary of environment variable settings
228 # to be exported for commands run on the host.
229 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
230 # errors that might happen.
231 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700232 self._xmlrpc_proxy_map = {}
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800233 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700234 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700235 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700236
237
Chris Sosaa3ac2152012-05-23 22:23:13 -0700238 def machine_install(self, update_url=None, force_update=False,
239 local_devserver=False):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700240 if not update_url and self._parser.options.image:
241 update_url = self._parser.options.image
242 elif not update_url:
243 raise autoupdater.ChromiumOSError(
244 'Update failed. No update URL provided.')
245
Chris Sosafab08082013-01-04 15:21:20 -0800246 # In case the system is in a bad state, we always reboot the machine
247 # before machine_install.
248 self.reboot(timeout=60, wait=True)
249
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700250 # Attempt to update the system.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700251 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
252 local_devserver=local_devserver)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700253 if updater.run_update(force_update):
254 # Figure out active and inactive kernel.
255 active_kernel, inactive_kernel = updater.get_kernel_state()
256
257 # Ensure inactive kernel has higher priority than active.
258 if (updater.get_kernel_priority(inactive_kernel)
259 < updater.get_kernel_priority(active_kernel)):
260 raise autoupdater.ChromiumOSError(
261 'Update failed. The priority of the inactive kernel'
262 ' partition is less than that of the active kernel'
263 ' partition.')
264
Scott Zawalski21902002012-09-19 17:57:00 -0400265 update_engine_log = '/var/log/update_engine.log'
266 logging.info('Dumping %s', update_engine_log)
267 self.run('cat %s' % update_engine_log)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800268 # Updater has returned successfully; reboot the host.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700269 self.reboot(timeout=60, wait=True)
Chris Sosae146ed82012-09-19 17:58:36 -0700270 # Touch the lab machine file to leave a marker that distinguishes
271 # this image from other test images.
Richard Barnette82c35912012-11-20 10:09:10 -0800272 self.run('touch %s' % self._LAB_MACHINE_FILE)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700273
274 # Following the reboot, verify the correct version.
275 updater.check_version()
276
277 # Figure out newly active kernel.
278 new_active_kernel, _ = updater.get_kernel_state()
279
280 # Ensure that previously inactive kernel is now the active kernel.
281 if new_active_kernel != inactive_kernel:
282 raise autoupdater.ChromiumOSError(
283 'Update failed. New kernel partition is not active after'
284 ' boot.')
285
286 host_attributes = site_host_attributes.HostAttributes(self.hostname)
287 if host_attributes.has_chromeos_firmware:
288 # Wait until tries == 0 and success, or until timeout.
289 utils.poll_for_condition(
290 lambda: (updater.get_kernel_tries(new_active_kernel) == 0
291 and updater.get_kernel_success(new_active_kernel)),
292 exception=autoupdater.ChromiumOSError(
293 'Update failed. Timed out waiting for system to mark'
294 ' new kernel as successful.'),
295 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
296
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700297 # Clean up any old autotest directories which may be lying around.
298 for path in global_config.global_config.get_config_value(
299 'AUTOSERV', 'client_autodir_paths', type=list):
300 self.run('rm -rf ' + path)
301
302
Richard Barnette82c35912012-11-20 10:09:10 -0800303 def _get_board_from_afe(self):
304 """Retrieve this host's board from its labels in the AFE.
305
306 Looks for a host label of the form "board:<board>", and
307 returns the "<board>" part of the label. `None` is returned
308 if there is not a single, unique label matching the pattern.
309
310 @returns board from label, or `None`.
311 """
312 host_model = models.Host.objects.get(hostname=self.hostname)
313 board_labels = filter(lambda l: l.name.startswith('board:'),
314 host_model.labels.all())
315 board_name = None
316 if len(board_labels) == 1:
317 board_name = board_labels[0].name.split(':', 1)[1]
318 elif len(board_labels) == 0:
319 logging.error('Host %s does not have a board label.',
320 self.hostname)
321 else:
322 logging.error('Host %s has multiple board labels.',
323 self.hostname)
324 return board_name
325
326
Richard Barnette03a0c132012-11-05 12:40:35 -0800327 def _servo_repair(self, board):
328 """Attempt to repair this host using an attached Servo.
329
330 Re-install the OS on the DUT by 1) installing a test image
331 on a USB storage device attached to the Servo board,
332 2) booting that image in recovery mode, and then
333 3) installing the image.
334
335 """
336 server = dev_server.ImageServer.devserver_url_for_servo(board)
337 image = server + (self._DEFAULT_SERVO_URL_FORMAT %
338 { 'board': board })
339 self.servo.install_recovery_image(image)
340 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
341 raise error.AutoservError('DUT failed to boot from USB'
342 ' after %d seconds' %
343 self.USB_BOOT_TIMEOUT)
344 self.run('chromeos-install --yes',
345 timeout=self._INSTALL_TIMEOUT)
346 self.servo.power_long_press()
347 self.servo.set('usb_mux_sel1', 'servo_sees_usbkey')
348 self.servo.power_short_press()
349 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
350 raise error.AutoservError('DUT failed to reboot installed '
351 'test image after %d seconds' %
352 self.BOOT_TIMEOUT)
353
354
Richard Barnette82c35912012-11-20 10:09:10 -0800355 def _powercycle_to_repair(self):
356 """Utilize the RPM Infrastructure to bring the host back up.
357
358 If the host is not up/repaired after the first powercycle we utilize
359 auto fallback to the last good install by powercycling and rebooting the
360 host 6 times.
361 """
362 logging.info('Attempting repair via RPM powercycle.')
363 failed_cycles = 0
364 self.power_cycle()
365 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
366 failed_cycles += 1
367 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
368 raise error.AutoservError('Powercycled host %s %d times; '
369 'device did not come back online.' %
370 (self.hostname, failed_cycles))
371 self.power_cycle()
372 if failed_cycles == 0:
373 logging.info('Powercycling was successful first time.')
374 else:
375 logging.info('Powercycling was successful after %d failures.',
376 failed_cycles)
377
378
379 def repair_full(self):
380 """Repair a host for repair level NO_PROTECTION.
381
382 This overrides the base class function for repair; it does
383 not call back to the parent class, but instead offers a
384 simplified implementation based on the capabilities in the
385 Chrome OS test lab.
386
387 Repair follows this sequence:
388 1. If the DUT passes `self.verify()`, do nothing.
389 2. If the DUT can be power-cycled via RPM, try to repair
390 by power-cycling.
391
392 As with the parent method, the last operation performed on
393 the DUT must be to call `self.verify()`; if that call fails,
394 the exception it raises is passed back to the caller.
395 """
396 try:
397 self.verify()
398 except:
399 host_board = self._get_board_from_afe()
Richard Barnette03a0c132012-11-05 12:40:35 -0800400 if host_board is None:
401 logging.error('host %s has no board; failing repair',
402 self.hostname)
Richard Barnette82c35912012-11-20 10:09:10 -0800403 raise
Richard Barnette03a0c132012-11-05 12:40:35 -0800404 if (self.servo and
405 host_board in self._SERVO_REPAIR_WHITELIST):
406 self._servo_repair(host_board)
407 elif (self.has_power() and
408 host_board in self._RPM_RECOVERY_BOARDS):
409 self._powercycle_to_repair()
410 else:
411 logging.error('host %s has no servo and no RPM control; '
412 'failing repair', self.hostname)
413 raise
Richard Barnette82c35912012-11-20 10:09:10 -0800414 self.verify()
415
416
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700417 def close(self):
418 super(SiteHost, self).close()
419 self.xmlrpc_disconnect_all()
420
421
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700422 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700423 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800424 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500425 try:
426 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
427 '_clear_login_prompt_state')
428 self.run('restart ui')
429 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
430 '_wait_for_login_prompt')
Scott Zawalski2eed1122013-02-02 17:32:33 -0500431 except error.AutotestRunError:
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500432 logging.warn('Unable to restart ui, rebooting device.')
433 # Since restarting the UI fails fall back to normal Autotest
434 # cleanup routines, i.e. reboot the machine.
435 super(SiteHost, self).cleanup()
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700436
437
Simran Basi154f5582012-10-23 16:27:11 -0700438 # TODO (sbasi) crosbug.com/35656
439 # Renamed the sitehost cleanup method so we don't go down this pathway.
440 # def cleanup(self):
441 def cleanup_poweron(self):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700442 """Special cleanup method to make sure hosts always get power back."""
Chris Sosa9479fcd2012-10-09 13:44:22 -0700443 super(SiteHost, self).cleanup()
Simran Basid5e5e272012-09-24 15:23:59 -0700444 if self.has_power():
Simran Basifd23fb22012-10-22 17:56:22 -0700445 try:
446 self.power_on()
Chris Sosafab08082013-01-04 15:21:20 -0800447 except rpm_client.RemotePowerException:
Simran Basifd23fb22012-10-22 17:56:22 -0700448 # If cleanup has completed but there was an issue with the RPM
449 # Infrastructure, log an error message rather than fail cleanup
450 logging.error('Failed to turn Power On for this host after '
451 'cleanup through the RPM Infrastructure.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700452
453
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700454 def reboot(self, **dargs):
455 """
456 This function reboots the site host. The more generic
457 RemoteHost.reboot() performs sync and sleeps for 5
458 seconds. This is not necessary for Chrome OS devices as the
459 sync should be finished in a short time during the reboot
460 command.
461 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800462 if 'reboot_cmd' not in dargs:
463 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
464 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700465 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800466 if 'fastsync' not in dargs:
467 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700468 super(SiteHost, self).reboot(**dargs)
469
470
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700471 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800472 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700473
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800474 Tests for the following conditions:
475 1. All conditions tested by the parent version of this
476 function.
477 2. Sufficient space in /mnt/stateful_partition.
478 3. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700479
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700480 """
481 super(SiteHost, self).verify_software()
482 self.check_diskspace(
483 '/mnt/stateful_partition',
484 global_config.global_config.get_config_value(
485 'SERVER', 'gb_diskspace_required', type=int,
486 default=20))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800487 self.run('update_engine_client --status')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700488
489
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800490 def xmlrpc_connect(self, command, port, command_name=None):
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700491 """Connect to an XMLRPC server on the host.
492
493 The `command` argument should be a simple shell command that
494 starts an XMLRPC server on the given `port`. The command
495 must not daemonize, and must terminate cleanly on SIGTERM.
496 The command is started in the background on the host, and a
497 local XMLRPC client for the server is created and returned
498 to the caller.
499
500 Note that the process of creating an XMLRPC client makes no
501 attempt to connect to the remote server; the caller is
502 responsible for determining whether the server is running
503 correctly, and is ready to serve requests.
504
505 @param command Shell command to start the server.
506 @param port Port number on which the server is expected to
507 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800508 @param command_name String to use as input to `pkill` to
509 terminate the XMLRPC server on the host.
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700510 """
511 self.xmlrpc_disconnect(port)
512
513 # Chrome OS on the target closes down most external ports
514 # for security. We could open the port, but doing that
515 # would conflict with security tests that check that only
516 # expected ports are open. So, to get to the port on the
517 # target we use an ssh tunnel.
518 local_port = utils.get_unused_port()
519 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
520 ssh_cmd = make_ssh_command(opts=tunnel_options)
521 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
522 logging.debug('Full tunnel command: %s', tunnel_cmd)
523 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
524 logging.debug('Started XMLRPC tunnel, local = %d'
525 ' remote = %d, pid = %d',
526 local_port, port, tunnel_proc.pid)
527
528 # Start the server on the host. Redirection in the command
529 # below is necessary, because 'ssh' won't terminate until
530 # background child processes close stdin, stdout, and
531 # stderr.
532 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
533 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
534 logging.debug('Started XMLRPC server on host %s, pid = %s',
535 self.hostname, remote_pid)
536
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800537 self._xmlrpc_proxy_map[port] = (command_name, tunnel_proc)
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700538 rpc_url = 'http://localhost:%d' % local_port
539 return xmlrpclib.ServerProxy(rpc_url, allow_none=True)
540
541
542 def xmlrpc_disconnect(self, port):
543 """Disconnect from an XMLRPC server on the host.
544
545 Terminates the remote XMLRPC server previously started for
546 the given `port`. Also closes the local ssh tunnel created
547 for the connection to the host. This function does not
548 directly alter the state of a previously returned XMLRPC
549 client object; however disconnection will cause all
550 subsequent calls to methods on the object to fail.
551
552 This function does nothing if requested to disconnect a port
553 that was not previously connected via `self.xmlrpc_connect()`
554
555 @param port Port number passed to a previous call to
556 `xmlrpc_connect()`
557 """
558 if port not in self._xmlrpc_proxy_map:
559 return
560 entry = self._xmlrpc_proxy_map[port]
561 remote_name = entry[0]
562 tunnel_proc = entry[1]
563 if remote_name:
564 # We use 'pkill' to find our target process rather than
565 # a PID, because the host may have rebooted since
566 # connecting, and we don't want to kill an innocent
567 # process with the same PID.
568 #
569 # 'pkill' helpfully exits with status 1 if no target
570 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -0700571 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700572 # status.
573 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
574
575 if tunnel_proc.poll() is None:
576 tunnel_proc.terminate()
577 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
578 else:
579 logging.debug('Tunnel pid %d terminated early, status %d',
580 tunnel_proc.pid, tunnel_proc.returncode)
581 del self._xmlrpc_proxy_map[port]
582
583
584 def xmlrpc_disconnect_all(self):
585 """Disconnect all known XMLRPC proxy ports."""
586 for port in self._xmlrpc_proxy_map.keys():
587 self.xmlrpc_disconnect(port)
588
589
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700590 def _ping_is_up(self):
591 """Ping the host once, and return whether it responded."""
592 return utils.ping(self.hostname, tries=1, deadline=1) == 0
593
594
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800595 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700596 """Wait until the host no longer responds to `ping`.
597
598 @param timeout Minimum time to allow before declaring the
599 host to be non-responsive.
600 """
601
602 # This function is a slightly faster version of wait_down().
603 #
604 # In AbstractSSHHost.wait_down(), `ssh` is used to determine
605 # whether the host is down. In some situations (mine, at
606 # least), `ssh` can take over a minute to determine that the
607 # host is down. The `ping` command answers the question
608 # faster, so we use that here instead.
609 #
610 # There is no equivalent for wait_up(), because a target that
611 # answers to `ping` won't necessarily respond to `ssh`.
612 end_time = time.time() + timeout
613 while time.time() <= end_time:
614 if not self._ping_is_up():
615 return True
616
617 # If the timeout is short relative to the run time of
618 # _ping_is_up(), we might be prone to false failures for
619 # lack of checking frequently enough. To be safe, we make
620 # one last check _after_ the deadline.
621 return not self._ping_is_up()
622
623
624 def test_wait_for_sleep(self):
625 """Wait for the client to enter low-power sleep mode.
626
627 The test for "is asleep" can't distinguish a system that is
628 powered off; to confirm that the unit was asleep, it is
629 necessary to force resume, and then call
630 `test_wait_for_resume()`.
631
632 This function is expected to be called from a test as part
633 of a sequence like the following:
634
635 ~~~~~~~~
636 boot_id = host.get_boot_id()
637 # trigger sleep on the host
638 host.test_wait_for_sleep()
639 # trigger resume on the host
640 host.test_wait_for_resume(boot_id)
641 ~~~~~~~~
642
643 @exception TestFail The host did not go to sleep within
644 the allowed time.
645 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800646 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700647 raise error.TestFail(
648 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700649 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700650
651
652 def test_wait_for_resume(self, old_boot_id):
653 """Wait for the client to resume from low-power sleep mode.
654
655 The `old_boot_id` parameter should be the value from
656 `get_boot_id()` obtained prior to entering sleep mode. A
657 `TestFail` exception is raised if the boot id changes.
658
659 See @ref test_wait_for_sleep for more on this function's
660 usage.
661
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800662 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700663 target host went to sleep.
664
665 @exception TestFail The host did not respond within the
666 allowed time.
667 @exception TestFail The host responded, but the boot id test
668 indicated a reboot rather than a sleep
669 cycle.
670 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700671 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700672 raise error.TestFail(
673 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700674 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700675 else:
676 new_boot_id = self.get_boot_id()
677 if new_boot_id != old_boot_id:
678 raise error.TestFail(
679 'client rebooted, but sleep was expected'
680 ' (old boot %s, new boot %s)'
681 % (old_boot_id, new_boot_id))
682
683
684 def test_wait_for_shutdown(self):
685 """Wait for the client to shut down.
686
687 The test for "has shut down" can't distinguish a system that
688 is merely asleep; to confirm that the unit was down, it is
689 necessary to force boot, and then call test_wait_for_boot().
690
691 This function is expected to be called from a test as part
692 of a sequence like the following:
693
694 ~~~~~~~~
695 boot_id = host.get_boot_id()
696 # trigger shutdown on the host
697 host.test_wait_for_shutdown()
698 # trigger boot on the host
699 host.test_wait_for_boot(boot_id)
700 ~~~~~~~~
701
702 @exception TestFail The host did not shut down within the
703 allowed time.
704 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800705 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700706 raise error.TestFail(
707 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700708 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700709
710
711 def test_wait_for_boot(self, old_boot_id=None):
712 """Wait for the client to boot from cold power.
713
714 The `old_boot_id` parameter should be the value from
715 `get_boot_id()` obtained prior to shutting down. A
716 `TestFail` exception is raised if the boot id does not
717 change. The boot id test is omitted if `old_boot_id` is not
718 specified.
719
720 See @ref test_wait_for_shutdown for more on this function's
721 usage.
722
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800723 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700724 shut down.
725
726 @exception TestFail The host did not respond within the
727 allowed time.
728 @exception TestFail The host responded, but the boot id test
729 indicated that there was no reboot.
730 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700731 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700732 raise error.TestFail(
733 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700734 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700735 elif old_boot_id:
736 if self.get_boot_id() == old_boot_id:
737 raise error.TestFail(
738 'client is back up, but did not reboot'
739 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -0700740
741
742 @staticmethod
743 def check_for_rpm_support(hostname):
744 """For a given hostname, return whether or not it is powered by an RPM.
745
746 @return None if this host does not follows the defined naming format
747 for RPM powered DUT's in the lab. If it does follow the format,
748 it returns a regular expression MatchObject instead.
749 """
Richard Barnette82c35912012-11-20 10:09:10 -0800750 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -0700751
752
753 def has_power(self):
754 """For this host, return whether or not it is powered by an RPM.
755
756 @return True if this host is in the CROS lab and follows the defined
757 naming format.
758 """
759 return SiteHost.check_for_rpm_support(self.hostname)
760
761
Simran Basid5e5e272012-09-24 15:23:59 -0700762 def power_off(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800763 """Turn off power to this host via RPM."""
Simran Basidcff4252012-11-20 16:13:20 -0800764 rpm_client.set_power(self.hostname, 'OFF')
Simran Basid5e5e272012-09-24 15:23:59 -0700765
766
767 def power_on(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800768 """Turn on power to this host via RPM."""
Simran Basidcff4252012-11-20 16:13:20 -0800769 rpm_client.set_power(self.hostname, 'ON')
Simran Basid5e5e272012-09-24 15:23:59 -0700770
771
772 def power_cycle(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800773 """Cycle power to this host by turning it OFF, then ON."""
Simran Basidcff4252012-11-20 16:13:20 -0800774 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -0700775
776
777 def get_platform(self):
778 """Determine the correct platform label for this host.
779
780 @returns a string representing this host's platform.
781 """
782 crossystem = utils.Crossystem(self)
783 crossystem.init()
784 # Extract fwid value and use the leading part as the platform id.
785 # fwid generally follow the format of {platform}.{firmware version}
786 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
787 platform = crossystem.fwid().split('.')[0].lower()
788 # Newer platforms start with 'Google_' while the older ones do not.
789 return platform.replace('google_', '')
790
791
Richard Barnette82c35912012-11-20 10:09:10 -0800792 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700793 def get_board(self):
794 """Determine the correct board label for this host.
795
796 @returns a string representing this host's board.
797 """
798 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
799 run_method=self.run)
800 board = release_info['CHROMEOS_RELEASE_BOARD']
801 # Devices in the lab generally have the correct board name but our own
802 # development devices have {board_name}-signed-{key_type}. The board
803 # name may also begin with 'x86-' which we need to keep.
804 if 'x86' not in board:
805 return 'board:%s' % board.split('-')[0]
806 return 'board:%s' % '-'.join(board.split('-')[0:2])
807
808
Richard Barnette82c35912012-11-20 10:09:10 -0800809 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700810 def has_lightsensor(self):
811 """Determine the correct board label for this host.
812
813 @returns the string 'lightsensor' if this host has a lightsensor or
814 None if it does not.
815 """
816 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -0800817 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -0700818 try:
819 # Run the search cmd following the symlinks. Stderr_tee is set to
820 # None as there can be a symlink loop, but the command will still
821 # execute correctly with a few messages printed to stderr.
822 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
823 return 'lightsensor'
824 except error.AutoservRunError:
825 # egrep exited with a return code of 1 meaning none of the possible
826 # lightsensor files existed.
827 return None
828
829
Richard Barnette82c35912012-11-20 10:09:10 -0800830 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700831 def has_bluetooth(self):
832 """Determine the correct board label for this host.
833
834 @returns the string 'bluetooth' if this host has bluetooth or
835 None if it does not.
836 """
837 try:
838 self.run('test -d /sys/class/bluetooth/hci0')
839 # test exited with a return code of 0.
840 return 'bluetooth'
841 except error.AutoservRunError:
842 # test exited with a return code 1 meaning the directory did not
843 # exist.
844 return None
845
846
847 def get_labels(self):
848 """Return a list of labels for this given host.
849
850 This is the main way to retrieve all the automatic labels for a host
851 as it will run through all the currently implemented label functions.
852 """
853 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -0800854 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -0700855 label = label_function(self)
856 if label:
857 labels.append(label)
858 return labels