blob: 36daaad81e502a59bae613d12a121f79e46887bd [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
J. Richard Barnette1d78b012012-05-15 13:56:30 -07005import logging
Simran Basid5e5e272012-09-24 15:23:59 -07006import re
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07008import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -07009import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070010
J. Richard Barnette45e93de2012-04-11 17:24:15 -070011from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080012from autotest_lib.client.common_lib import error
13from autotest_lib.client.common_lib import global_config
J. Richard Barnette45e93de2012-04-11 17:24:15 -070014from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080015from autotest_lib.client.common_lib.cros import dev_server
Richard Barnette82c35912012-11-20 10:09:10 -080016from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070018from autotest_lib.server import autotest
J. Richard Barnette45e93de2012-04-11 17:24:15 -070019from autotest_lib.server import site_host_attributes
J. Richard Barnette67ccb872012-04-19 16:34:56 -070020from autotest_lib.server.cros import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070021from autotest_lib.server.hosts import remote
Simran Basidcff4252012-11-20 16:13:20 -080022from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070023
Richard Barnette82c35912012-11-20 10:09:10 -080024# Importing frontend.afe.models requires a full Autotest
25# installation (with the Django modules), not just the source
26# repository. Most developers won't have the full installation, so
27# the imports below will fail for them.
28#
29# The fix is to catch import exceptions, and set `models` to `None`
30# on failure. This has the side effect that
31# SiteHost._get_board_from_afe() will fail: That will manifest as
32# failures during Repair jobs leaving the DUT as "Repair Failed".
33# In practice, you can't test Repair jobs without a full
34# installation, so that kind of failure isn't expected.
35try:
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080036 # pylint: disable=W0611
Richard Barnette82c35912012-11-20 10:09:10 -080037 from autotest_lib.frontend import setup_django_environment
38 from autotest_lib.frontend.afe import models
39except:
40 models = None
41
Simran Basid5e5e272012-09-24 15:23:59 -070042
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080043def _make_servo_hostname(hostname):
44 host_parts = hostname.split('.')
45 host_parts[0] = host_parts[0] + '-servo'
46 return '.'.join(host_parts)
47
48
49def _get_lab_servo(target_hostname):
50 """Instantiate a Servo for |target_hostname| in the lab.
51
52 Assuming that |target_hostname| is a device in the CrOS test
53 lab, create and return a Servo object pointed at the servo
54 attached to that DUT. The servo in the test lab is assumed
55 to already have servod up and running on it.
56
57 @param target_hostname: device whose servo we want to target.
58 @return an appropriately configured Servo instance.
59 """
60 servo_host = _make_servo_hostname(target_hostname)
61 if utils.host_is_in_lab_zone(servo_host):
62 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080063 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080064 except: # pylint: disable=W0702
65 # TODO(jrbarnette): Long-term, if we can't get to
66 # a servo in the lab, we want to fail, so we should
67 # pass any exceptions along. Short-term, we're not
68 # ready to rely on servo, so we ignore failures.
69 pass
70 return None
71
72
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070073def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
74 connect_timeout=None, alive_interval=None):
75 """Override default make_ssh_command to use options tuned for Chrome OS.
76
77 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070078 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
79 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070080
Dale Curtisaa5eedb2011-08-23 16:18:52 -070081 - ServerAliveInterval=180; which causes SSH to ping connection every
82 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
83 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
84 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070085
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070086 - ServerAliveCountMax=3; consistency with remote_access.sh.
87
88 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
89 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070090
91 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
92 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070093
94 - SSH protocol forced to 2; needed for ServerAliveInterval.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080095
96 @param user User name to use for the ssh connection.
97 @param port Port on the target host to use for ssh connection.
98 @param opts Additional options to the ssh command.
99 @param hosts_file Ignored.
100 @param connect_timeout Ignored.
101 @param alive_interval Ignored.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700102 """
103 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
104 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -0700105 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
106 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
107 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700108 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700109
110
Simran Basic6f1f7a2012-10-16 10:47:46 -0700111def add_function_to_list(functions_list):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800112 """Decorator used to group functions together into the provided list.
113
114 @param functions_list list to which the decorated function will
115 be added.
116 """
117 # pylint: disable=C0111
Simran Basic6f1f7a2012-10-16 10:47:46 -0700118 def add_func(func):
119 functions_list.append(func)
120 return func
121 return add_func
122
123
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700124class SiteHost(remote.RemoteHost):
125 """Chromium OS specific subclass of Host."""
126
127 _parser = autoserv_parser.autoserv_parser
128
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800129 # Time to wait for new kernel to be marked successful after
130 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -0700131 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700132
Richard Barnette03a0c132012-11-05 12:40:35 -0800133 # Timeout values (in seconds) associated with various Chrome OS
134 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700135 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800136 # In general, a good rule of thumb is that the timeout can be up
137 # to twice the typical measured value on the slowest platform.
138 # The times here have not necessarily been empirically tested to
139 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700140 #
141 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800142 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
143 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700144 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800145 # other things, this must account for the 30 second dev-mode
146 # screen delay and time to start the network,
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700147 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800148 # including the 30 second dev-mode delay and time to start the
149 # network,
150 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700151 # REBOOT_TIMEOUT: Combination of shutdown and reboot times.
Richard Barnette03a0c132012-11-05 12:40:35 -0800152 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700153
154 SLEEP_TIMEOUT = 2
155 RESUME_TIMEOUT = 5
156 BOOT_TIMEOUT = 45
157 USB_BOOT_TIMEOUT = 150
158 SHUTDOWN_TIMEOUT = 5
159 REBOOT_TIMEOUT = SHUTDOWN_TIMEOUT + BOOT_TIMEOUT
Richard Barnette03a0c132012-11-05 12:40:35 -0800160 _INSTALL_TIMEOUT = 240
161
162 _DEFAULT_SERVO_URL_FORMAT = ('/static/servo-images/'
163 '%(board)s_test_image.bin')
164
165 # TODO(jrbarnette): Servo repair is restricted to x86-alex,
166 # because the existing servo client code won't work on other
167 # boards. http://crosbug.com/36973
168 _SERVO_REPAIR_WHITELIST = [ 'x86-alex' ]
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800169
170
Richard Barnette82c35912012-11-20 10:09:10 -0800171 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
172 'rpm_recovery_boards', type=str).split(',')
173
174 _MAX_POWER_CYCLE_ATTEMPTS = 6
175 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
176 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
177 'host[0-9]+')
178 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
179 'in_illuminance0_raw',
180 'illuminance0_input']
181 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
182 _LABEL_FUNCTIONS = []
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700183
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800184
J. Richard Barnette964fba02012-10-24 17:34:29 -0700185 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800186 def get_servo_arguments(args_dict):
187 """Extract servo options from `args_dict` and return the result.
188
189 Take the provided dictionary of argument options and return
190 a subset that represent standard arguments needed to
191 construct a servo object for a host. The intent is to
192 provide standard argument processing from run_remote_tests
193 for tests that require a servo to operate.
194
195 Recommended usage:
196 ~~~~~~~~
197 args_dict = utils.args_to_dict(args)
198 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
199 host = hosts.create_host(machine, servo_args=servo_args)
200 ~~~~~~~~
201
202 @param args_dict Dictionary from which to extract the servo
203 arguments.
204 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700205 servo_args = {}
206 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800207 if arg in args_dict:
208 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700209 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700210
J. Richard Barnette964fba02012-10-24 17:34:29 -0700211
212 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700213 """Initialize superclasses, and |self.servo|.
214
215 For creating the host servo object, there are three
216 possibilities: First, if the host is a lab system known to
217 have a servo board, we connect to that servo unconditionally.
218 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700219 servo features for testing, it will pass settings for
220 `servo_host`, `servo_port`, or both. If neither of these
221 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700222
223 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700224 super(SiteHost, self)._initialize(hostname=hostname,
225 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700226 # self.env is a dictionary of environment variable settings
227 # to be exported for commands run on the host.
228 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
229 # errors that might happen.
230 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700231 self._xmlrpc_proxy_map = {}
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800232 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700233 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700234 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700235
236
Chris Sosaa3ac2152012-05-23 22:23:13 -0700237 def machine_install(self, update_url=None, force_update=False,
238 local_devserver=False):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700239 if not update_url and self._parser.options.image:
240 update_url = self._parser.options.image
241 elif not update_url:
242 raise autoupdater.ChromiumOSError(
243 'Update failed. No update URL provided.')
244
Chris Sosafab08082013-01-04 15:21:20 -0800245 # In case the system is in a bad state, we always reboot the machine
246 # before machine_install.
247 self.reboot(timeout=60, wait=True)
248
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700249 # Attempt to update the system.
Chris Sosaa3ac2152012-05-23 22:23:13 -0700250 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
251 local_devserver=local_devserver)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700252 if updater.run_update(force_update):
253 # Figure out active and inactive kernel.
254 active_kernel, inactive_kernel = updater.get_kernel_state()
255
256 # Ensure inactive kernel has higher priority than active.
257 if (updater.get_kernel_priority(inactive_kernel)
258 < updater.get_kernel_priority(active_kernel)):
259 raise autoupdater.ChromiumOSError(
260 'Update failed. The priority of the inactive kernel'
261 ' partition is less than that of the active kernel'
262 ' partition.')
263
Scott Zawalski21902002012-09-19 17:57:00 -0400264 update_engine_log = '/var/log/update_engine.log'
265 logging.info('Dumping %s', update_engine_log)
266 self.run('cat %s' % update_engine_log)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800267 # Updater has returned successfully; reboot the host.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700268 self.reboot(timeout=60, wait=True)
Chris Sosae146ed82012-09-19 17:58:36 -0700269 # Touch the lab machine file to leave a marker that distinguishes
270 # this image from other test images.
Richard Barnette82c35912012-11-20 10:09:10 -0800271 self.run('touch %s' % self._LAB_MACHINE_FILE)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700272
273 # Following the reboot, verify the correct version.
274 updater.check_version()
275
276 # Figure out newly active kernel.
277 new_active_kernel, _ = updater.get_kernel_state()
278
279 # Ensure that previously inactive kernel is now the active kernel.
280 if new_active_kernel != inactive_kernel:
281 raise autoupdater.ChromiumOSError(
282 'Update failed. New kernel partition is not active after'
283 ' boot.')
284
285 host_attributes = site_host_attributes.HostAttributes(self.hostname)
286 if host_attributes.has_chromeos_firmware:
287 # Wait until tries == 0 and success, or until timeout.
288 utils.poll_for_condition(
289 lambda: (updater.get_kernel_tries(new_active_kernel) == 0
290 and updater.get_kernel_success(new_active_kernel)),
291 exception=autoupdater.ChromiumOSError(
292 'Update failed. Timed out waiting for system to mark'
293 ' new kernel as successful.'),
294 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
295
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700296 # Clean up any old autotest directories which may be lying around.
297 for path in global_config.global_config.get_config_value(
298 'AUTOSERV', 'client_autodir_paths', type=list):
299 self.run('rm -rf ' + path)
300
301
Richard Barnette82c35912012-11-20 10:09:10 -0800302 def _get_board_from_afe(self):
303 """Retrieve this host's board from its labels in the AFE.
304
305 Looks for a host label of the form "board:<board>", and
306 returns the "<board>" part of the label. `None` is returned
307 if there is not a single, unique label matching the pattern.
308
309 @returns board from label, or `None`.
310 """
311 host_model = models.Host.objects.get(hostname=self.hostname)
312 board_labels = filter(lambda l: l.name.startswith('board:'),
313 host_model.labels.all())
314 board_name = None
315 if len(board_labels) == 1:
316 board_name = board_labels[0].name.split(':', 1)[1]
317 elif len(board_labels) == 0:
318 logging.error('Host %s does not have a board label.',
319 self.hostname)
320 else:
321 logging.error('Host %s has multiple board labels.',
322 self.hostname)
323 return board_name
324
325
Richard Barnette03a0c132012-11-05 12:40:35 -0800326 def _servo_repair(self, board):
327 """Attempt to repair this host using an attached Servo.
328
329 Re-install the OS on the DUT by 1) installing a test image
330 on a USB storage device attached to the Servo board,
331 2) booting that image in recovery mode, and then
332 3) installing the image.
333
334 """
335 server = dev_server.ImageServer.devserver_url_for_servo(board)
336 image = server + (self._DEFAULT_SERVO_URL_FORMAT %
337 { 'board': board })
338 self.servo.install_recovery_image(image)
339 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
340 raise error.AutoservError('DUT failed to boot from USB'
341 ' after %d seconds' %
342 self.USB_BOOT_TIMEOUT)
343 self.run('chromeos-install --yes',
344 timeout=self._INSTALL_TIMEOUT)
345 self.servo.power_long_press()
346 self.servo.set('usb_mux_sel1', 'servo_sees_usbkey')
347 self.servo.power_short_press()
348 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
349 raise error.AutoservError('DUT failed to reboot installed '
350 'test image after %d seconds' %
351 self.BOOT_TIMEOUT)
352
353
Richard Barnette82c35912012-11-20 10:09:10 -0800354 def _powercycle_to_repair(self):
355 """Utilize the RPM Infrastructure to bring the host back up.
356
357 If the host is not up/repaired after the first powercycle we utilize
358 auto fallback to the last good install by powercycling and rebooting the
359 host 6 times.
360 """
361 logging.info('Attempting repair via RPM powercycle.')
362 failed_cycles = 0
363 self.power_cycle()
364 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
365 failed_cycles += 1
366 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
367 raise error.AutoservError('Powercycled host %s %d times; '
368 'device did not come back online.' %
369 (self.hostname, failed_cycles))
370 self.power_cycle()
371 if failed_cycles == 0:
372 logging.info('Powercycling was successful first time.')
373 else:
374 logging.info('Powercycling was successful after %d failures.',
375 failed_cycles)
376
377
378 def repair_full(self):
379 """Repair a host for repair level NO_PROTECTION.
380
381 This overrides the base class function for repair; it does
382 not call back to the parent class, but instead offers a
383 simplified implementation based on the capabilities in the
384 Chrome OS test lab.
385
386 Repair follows this sequence:
387 1. If the DUT passes `self.verify()`, do nothing.
388 2. If the DUT can be power-cycled via RPM, try to repair
389 by power-cycling.
390
391 As with the parent method, the last operation performed on
392 the DUT must be to call `self.verify()`; if that call fails,
393 the exception it raises is passed back to the caller.
394 """
395 try:
396 self.verify()
397 except:
398 host_board = self._get_board_from_afe()
Richard Barnette03a0c132012-11-05 12:40:35 -0800399 if host_board is None:
400 logging.error('host %s has no board; failing repair',
401 self.hostname)
Richard Barnette82c35912012-11-20 10:09:10 -0800402 raise
Richard Barnette03a0c132012-11-05 12:40:35 -0800403 if (self.servo and
404 host_board in self._SERVO_REPAIR_WHITELIST):
405 self._servo_repair(host_board)
406 elif (self.has_power() and
407 host_board in self._RPM_RECOVERY_BOARDS):
408 self._powercycle_to_repair()
409 else:
410 logging.error('host %s has no servo and no RPM control; '
411 'failing repair', self.hostname)
412 raise
Richard Barnette82c35912012-11-20 10:09:10 -0800413 self.verify()
414
415
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700416 def close(self):
417 super(SiteHost, self).close()
418 self.xmlrpc_disconnect_all()
419
420
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700421 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700422 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800423 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500424 try:
425 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
426 '_clear_login_prompt_state')
427 self.run('restart ui')
428 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
429 '_wait_for_login_prompt')
Scott Zawalski2eed1122013-02-02 17:32:33 -0500430 except error.AutotestRunError:
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500431 logging.warn('Unable to restart ui, rebooting device.')
432 # Since restarting the UI fails fall back to normal Autotest
433 # cleanup routines, i.e. reboot the machine.
434 super(SiteHost, self).cleanup()
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700435
436
Simran Basi154f5582012-10-23 16:27:11 -0700437 # TODO (sbasi) crosbug.com/35656
438 # Renamed the sitehost cleanup method so we don't go down this pathway.
439 # def cleanup(self):
440 def cleanup_poweron(self):
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700441 """Special cleanup method to make sure hosts always get power back."""
Chris Sosa9479fcd2012-10-09 13:44:22 -0700442 super(SiteHost, self).cleanup()
Simran Basid5e5e272012-09-24 15:23:59 -0700443 if self.has_power():
Simran Basifd23fb22012-10-22 17:56:22 -0700444 try:
445 self.power_on()
Chris Sosafab08082013-01-04 15:21:20 -0800446 except rpm_client.RemotePowerException:
Simran Basifd23fb22012-10-22 17:56:22 -0700447 # If cleanup has completed but there was an issue with the RPM
448 # Infrastructure, log an error message rather than fail cleanup
449 logging.error('Failed to turn Power On for this host after '
450 'cleanup through the RPM Infrastructure.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700451
452
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700453 def reboot(self, **dargs):
454 """
455 This function reboots the site host. The more generic
456 RemoteHost.reboot() performs sync and sleeps for 5
457 seconds. This is not necessary for Chrome OS devices as the
458 sync should be finished in a short time during the reboot
459 command.
460 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800461 if 'reboot_cmd' not in dargs:
462 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
463 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700464 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800465 if 'fastsync' not in dargs:
466 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700467 super(SiteHost, self).reboot(**dargs)
468
469
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700470 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800471 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700472
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800473 Tests for the following conditions:
474 1. All conditions tested by the parent version of this
475 function.
476 2. Sufficient space in /mnt/stateful_partition.
477 3. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700478
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700479 """
480 super(SiteHost, self).verify_software()
481 self.check_diskspace(
482 '/mnt/stateful_partition',
483 global_config.global_config.get_config_value(
484 'SERVER', 'gb_diskspace_required', type=int,
485 default=20))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800486 self.run('update_engine_client --status')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700487
488
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800489 def xmlrpc_connect(self, command, port, command_name=None):
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700490 """Connect to an XMLRPC server on the host.
491
492 The `command` argument should be a simple shell command that
493 starts an XMLRPC server on the given `port`. The command
494 must not daemonize, and must terminate cleanly on SIGTERM.
495 The command is started in the background on the host, and a
496 local XMLRPC client for the server is created and returned
497 to the caller.
498
499 Note that the process of creating an XMLRPC client makes no
500 attempt to connect to the remote server; the caller is
501 responsible for determining whether the server is running
502 correctly, and is ready to serve requests.
503
504 @param command Shell command to start the server.
505 @param port Port number on which the server is expected to
506 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800507 @param command_name String to use as input to `pkill` to
508 terminate the XMLRPC server on the host.
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700509 """
510 self.xmlrpc_disconnect(port)
511
512 # Chrome OS on the target closes down most external ports
513 # for security. We could open the port, but doing that
514 # would conflict with security tests that check that only
515 # expected ports are open. So, to get to the port on the
516 # target we use an ssh tunnel.
517 local_port = utils.get_unused_port()
518 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
519 ssh_cmd = make_ssh_command(opts=tunnel_options)
520 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
521 logging.debug('Full tunnel command: %s', tunnel_cmd)
522 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
523 logging.debug('Started XMLRPC tunnel, local = %d'
524 ' remote = %d, pid = %d',
525 local_port, port, tunnel_proc.pid)
526
527 # Start the server on the host. Redirection in the command
528 # below is necessary, because 'ssh' won't terminate until
529 # background child processes close stdin, stdout, and
530 # stderr.
531 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
532 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
533 logging.debug('Started XMLRPC server on host %s, pid = %s',
534 self.hostname, remote_pid)
535
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800536 self._xmlrpc_proxy_map[port] = (command_name, tunnel_proc)
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700537 rpc_url = 'http://localhost:%d' % local_port
538 return xmlrpclib.ServerProxy(rpc_url, allow_none=True)
539
540
541 def xmlrpc_disconnect(self, port):
542 """Disconnect from an XMLRPC server on the host.
543
544 Terminates the remote XMLRPC server previously started for
545 the given `port`. Also closes the local ssh tunnel created
546 for the connection to the host. This function does not
547 directly alter the state of a previously returned XMLRPC
548 client object; however disconnection will cause all
549 subsequent calls to methods on the object to fail.
550
551 This function does nothing if requested to disconnect a port
552 that was not previously connected via `self.xmlrpc_connect()`
553
554 @param port Port number passed to a previous call to
555 `xmlrpc_connect()`
556 """
557 if port not in self._xmlrpc_proxy_map:
558 return
559 entry = self._xmlrpc_proxy_map[port]
560 remote_name = entry[0]
561 tunnel_proc = entry[1]
562 if remote_name:
563 # We use 'pkill' to find our target process rather than
564 # a PID, because the host may have rebooted since
565 # connecting, and we don't want to kill an innocent
566 # process with the same PID.
567 #
568 # 'pkill' helpfully exits with status 1 if no target
569 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -0700570 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700571 # status.
572 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
573
574 if tunnel_proc.poll() is None:
575 tunnel_proc.terminate()
576 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
577 else:
578 logging.debug('Tunnel pid %d terminated early, status %d',
579 tunnel_proc.pid, tunnel_proc.returncode)
580 del self._xmlrpc_proxy_map[port]
581
582
583 def xmlrpc_disconnect_all(self):
584 """Disconnect all known XMLRPC proxy ports."""
585 for port in self._xmlrpc_proxy_map.keys():
586 self.xmlrpc_disconnect(port)
587
588
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700589 def _ping_is_up(self):
590 """Ping the host once, and return whether it responded."""
591 return utils.ping(self.hostname, tries=1, deadline=1) == 0
592
593
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800594 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700595 """Wait until the host no longer responds to `ping`.
596
597 @param timeout Minimum time to allow before declaring the
598 host to be non-responsive.
599 """
600
601 # This function is a slightly faster version of wait_down().
602 #
603 # In AbstractSSHHost.wait_down(), `ssh` is used to determine
604 # whether the host is down. In some situations (mine, at
605 # least), `ssh` can take over a minute to determine that the
606 # host is down. The `ping` command answers the question
607 # faster, so we use that here instead.
608 #
609 # There is no equivalent for wait_up(), because a target that
610 # answers to `ping` won't necessarily respond to `ssh`.
611 end_time = time.time() + timeout
612 while time.time() <= end_time:
613 if not self._ping_is_up():
614 return True
615
616 # If the timeout is short relative to the run time of
617 # _ping_is_up(), we might be prone to false failures for
618 # lack of checking frequently enough. To be safe, we make
619 # one last check _after_ the deadline.
620 return not self._ping_is_up()
621
622
623 def test_wait_for_sleep(self):
624 """Wait for the client to enter low-power sleep mode.
625
626 The test for "is asleep" can't distinguish a system that is
627 powered off; to confirm that the unit was asleep, it is
628 necessary to force resume, and then call
629 `test_wait_for_resume()`.
630
631 This function is expected to be called from a test as part
632 of a sequence like the following:
633
634 ~~~~~~~~
635 boot_id = host.get_boot_id()
636 # trigger sleep on the host
637 host.test_wait_for_sleep()
638 # trigger resume on the host
639 host.test_wait_for_resume(boot_id)
640 ~~~~~~~~
641
642 @exception TestFail The host did not go to sleep within
643 the allowed time.
644 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800645 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700646 raise error.TestFail(
647 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700648 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700649
650
651 def test_wait_for_resume(self, old_boot_id):
652 """Wait for the client to resume from low-power sleep mode.
653
654 The `old_boot_id` parameter should be the value from
655 `get_boot_id()` obtained prior to entering sleep mode. A
656 `TestFail` exception is raised if the boot id changes.
657
658 See @ref test_wait_for_sleep for more on this function's
659 usage.
660
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800661 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700662 target host went to sleep.
663
664 @exception TestFail The host did not respond within the
665 allowed time.
666 @exception TestFail The host responded, but the boot id test
667 indicated a reboot rather than a sleep
668 cycle.
669 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700670 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700671 raise error.TestFail(
672 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700673 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700674 else:
675 new_boot_id = self.get_boot_id()
676 if new_boot_id != old_boot_id:
677 raise error.TestFail(
678 'client rebooted, but sleep was expected'
679 ' (old boot %s, new boot %s)'
680 % (old_boot_id, new_boot_id))
681
682
683 def test_wait_for_shutdown(self):
684 """Wait for the client to shut down.
685
686 The test for "has shut down" can't distinguish a system that
687 is merely asleep; to confirm that the unit was down, it is
688 necessary to force boot, and then call test_wait_for_boot().
689
690 This function is expected to be called from a test as part
691 of a sequence like the following:
692
693 ~~~~~~~~
694 boot_id = host.get_boot_id()
695 # trigger shutdown on the host
696 host.test_wait_for_shutdown()
697 # trigger boot on the host
698 host.test_wait_for_boot(boot_id)
699 ~~~~~~~~
700
701 @exception TestFail The host did not shut down within the
702 allowed time.
703 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -0800704 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700705 raise error.TestFail(
706 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700707 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700708
709
710 def test_wait_for_boot(self, old_boot_id=None):
711 """Wait for the client to boot from cold power.
712
713 The `old_boot_id` parameter should be the value from
714 `get_boot_id()` obtained prior to shutting down. A
715 `TestFail` exception is raised if the boot id does not
716 change. The boot id test is omitted if `old_boot_id` is not
717 specified.
718
719 See @ref test_wait_for_shutdown for more on this function's
720 usage.
721
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800722 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700723 shut down.
724
725 @exception TestFail The host did not respond within the
726 allowed time.
727 @exception TestFail The host responded, but the boot id test
728 indicated that there was no reboot.
729 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700730 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700731 raise error.TestFail(
732 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700733 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700734 elif old_boot_id:
735 if self.get_boot_id() == old_boot_id:
736 raise error.TestFail(
737 'client is back up, but did not reboot'
738 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -0700739
740
741 @staticmethod
742 def check_for_rpm_support(hostname):
743 """For a given hostname, return whether or not it is powered by an RPM.
744
745 @return None if this host does not follows the defined naming format
746 for RPM powered DUT's in the lab. If it does follow the format,
747 it returns a regular expression MatchObject instead.
748 """
Richard Barnette82c35912012-11-20 10:09:10 -0800749 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -0700750
751
752 def has_power(self):
753 """For this host, return whether or not it is powered by an RPM.
754
755 @return True if this host is in the CROS lab and follows the defined
756 naming format.
757 """
758 return SiteHost.check_for_rpm_support(self.hostname)
759
760
Simran Basid5e5e272012-09-24 15:23:59 -0700761 def power_off(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800762 """Turn off power to this host via RPM."""
Simran Basidcff4252012-11-20 16:13:20 -0800763 rpm_client.set_power(self.hostname, 'OFF')
Simran Basid5e5e272012-09-24 15:23:59 -0700764
765
766 def power_on(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800767 """Turn on power to this host via RPM."""
Simran Basidcff4252012-11-20 16:13:20 -0800768 rpm_client.set_power(self.hostname, 'ON')
Simran Basid5e5e272012-09-24 15:23:59 -0700769
770
771 def power_cycle(self):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800772 """Cycle power to this host by turning it OFF, then ON."""
Simran Basidcff4252012-11-20 16:13:20 -0800773 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -0700774
775
776 def get_platform(self):
777 """Determine the correct platform label for this host.
778
779 @returns a string representing this host's platform.
780 """
781 crossystem = utils.Crossystem(self)
782 crossystem.init()
783 # Extract fwid value and use the leading part as the platform id.
784 # fwid generally follow the format of {platform}.{firmware version}
785 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
786 platform = crossystem.fwid().split('.')[0].lower()
787 # Newer platforms start with 'Google_' while the older ones do not.
788 return platform.replace('google_', '')
789
790
Richard Barnette82c35912012-11-20 10:09:10 -0800791 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700792 def get_board(self):
793 """Determine the correct board label for this host.
794
795 @returns a string representing this host's board.
796 """
797 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
798 run_method=self.run)
799 board = release_info['CHROMEOS_RELEASE_BOARD']
800 # Devices in the lab generally have the correct board name but our own
801 # development devices have {board_name}-signed-{key_type}. The board
802 # name may also begin with 'x86-' which we need to keep.
803 if 'x86' not in board:
804 return 'board:%s' % board.split('-')[0]
805 return 'board:%s' % '-'.join(board.split('-')[0:2])
806
807
Richard Barnette82c35912012-11-20 10:09:10 -0800808 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700809 def has_lightsensor(self):
810 """Determine the correct board label for this host.
811
812 @returns the string 'lightsensor' if this host has a lightsensor or
813 None if it does not.
814 """
815 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -0800816 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -0700817 try:
818 # Run the search cmd following the symlinks. Stderr_tee is set to
819 # None as there can be a symlink loop, but the command will still
820 # execute correctly with a few messages printed to stderr.
821 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
822 return 'lightsensor'
823 except error.AutoservRunError:
824 # egrep exited with a return code of 1 meaning none of the possible
825 # lightsensor files existed.
826 return None
827
828
Richard Barnette82c35912012-11-20 10:09:10 -0800829 @add_function_to_list(_LABEL_FUNCTIONS)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700830 def has_bluetooth(self):
831 """Determine the correct board label for this host.
832
833 @returns the string 'bluetooth' if this host has bluetooth or
834 None if it does not.
835 """
836 try:
837 self.run('test -d /sys/class/bluetooth/hci0')
838 # test exited with a return code of 0.
839 return 'bluetooth'
840 except error.AutoservRunError:
841 # test exited with a return code 1 meaning the directory did not
842 # exist.
843 return None
844
845
846 def get_labels(self):
847 """Return a list of labels for this given host.
848
849 This is the main way to retrieve all the automatic labels for a host
850 as it will run through all the currently implemented label functions.
851 """
852 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -0800853 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -0700854 label = label_function(self)
855 if label:
856 labels.append(label)
857 return labels