blob: 8833b4e39c039d5989c400c20ace2473d9bbff8f [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
beeps687243d2013-07-18 15:29:27 -07006import getpass
Christopher Wiley0ed712b2013-04-09 15:25:12 -07007import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import logging
Dan Shi0f466e82013-02-22 15:44:58 -08009import os
Simran Basid5e5e272012-09-24 15:23:59 -070010import re
beeps687243d2013-07-18 15:29:27 -070011import smtplib
Christopher Wileyd78249a2013-03-01 13:05:31 -080012import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070013import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070014import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070015import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070016
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080018from autotest_lib.client.common_lib import error
19from autotest_lib.client.common_lib import global_config
beeps687243d2013-07-18 15:29:27 -070020from autotest_lib.client.common_lib import mail
21from autotest_lib.client.common_lib import site_utils
J. Richard Barnette45e93de2012-04-11 17:24:15 -070022from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080023from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080024from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080025from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070026from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070027from autotest_lib.server import autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070028from autotest_lib.server import utils as server_utils
Scott Zawalski89c44dd2013-02-26 09:28:02 -050029from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070030from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
J. Richard Barnette75487572013-03-08 12:47:50 -080031from autotest_lib.server.cros.servo import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070032from autotest_lib.server.hosts import remote
beeps687243d2013-07-18 15:29:27 -070033from autotest_lib.site_utils.graphite import stats
Simran Basidcff4252012-11-20 16:13:20 -080034from autotest_lib.site_utils.rpm_control_system import rpm_client
beepsdae65fd2013-07-26 16:24:41 -070035from autotest_lib.tko import utils as tko_utils
Simran Basid5e5e272012-09-24 15:23:59 -070036
37
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080038def _make_servo_hostname(hostname):
39 host_parts = hostname.split('.')
40 host_parts[0] = host_parts[0] + '-servo'
41 return '.'.join(host_parts)
42
43
44def _get_lab_servo(target_hostname):
45 """Instantiate a Servo for |target_hostname| in the lab.
46
47 Assuming that |target_hostname| is a device in the CrOS test
48 lab, create and return a Servo object pointed at the servo
49 attached to that DUT. The servo in the test lab is assumed
50 to already have servod up and running on it.
51
52 @param target_hostname: device whose servo we want to target.
53 @return an appropriately configured Servo instance.
54 """
55 servo_host = _make_servo_hostname(target_hostname)
56 if utils.host_is_in_lab_zone(servo_host):
57 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080058 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080059 except: # pylint: disable=W0702
60 # TODO(jrbarnette): Long-term, if we can't get to
61 # a servo in the lab, we want to fail, so we should
62 # pass any exceptions along. Short-term, we're not
63 # ready to rely on servo, so we ignore failures.
64 pass
65 return None
66
67
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070068def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
69 connect_timeout=None, alive_interval=None):
70 """Override default make_ssh_command to use options tuned for Chrome OS.
71
72 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070073 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
74 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070075
Dale Curtisaa5eedb2011-08-23 16:18:52 -070076 - ServerAliveInterval=180; which causes SSH to ping connection every
77 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
78 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
79 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070080
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070081 - ServerAliveCountMax=3; consistency with remote_access.sh.
82
83 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
84 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070085
86 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
87 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070088
89 - SSH protocol forced to 2; needed for ServerAliveInterval.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080090
91 @param user User name to use for the ssh connection.
92 @param port Port on the target host to use for ssh connection.
93 @param opts Additional options to the ssh command.
94 @param hosts_file Ignored.
95 @param connect_timeout Ignored.
96 @param alive_interval Ignored.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070097 """
98 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
99 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -0700100 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
101 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
102 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700103 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700104
105
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800106
Aviv Keshet74c89a92013-02-04 15:18:30 -0800107def add_label_detector(label_function_list, label_list=None, label=None):
108 """Decorator used to group functions together into the provided list.
109 @param label_function_list: List of label detecting functions to add
110 decorated function to.
111 @param label_list: List of detectable labels to add detectable labels to.
112 (Default: None)
113 @param label: Label string that is detectable by this detection function
114 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800115 """
Simran Basic6f1f7a2012-10-16 10:47:46 -0700116 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -0800117 """
118 @param func: The function to be added as a detector.
119 """
120 label_function_list.append(func)
121 if label and label_list is not None:
122 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700123 return func
124 return add_func
125
126
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700127class SiteHost(remote.RemoteHost):
128 """Chromium OS specific subclass of Host."""
129
130 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -0500131 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700132
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800133 # Time to wait for new kernel to be marked successful after
134 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -0700135 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700136
Richard Barnette03a0c132012-11-05 12:40:35 -0800137 # Timeout values (in seconds) associated with various Chrome OS
138 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700139 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800140 # In general, a good rule of thumb is that the timeout can be up
141 # to twice the typical measured value on the slowest platform.
142 # The times here have not necessarily been empirically tested to
143 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700144 #
145 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800146 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
147 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700148 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800149 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800150 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700151 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800152 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800153 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800154 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700155 # REBOOT_TIMEOUT: How long to wait for a reboot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800156 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700157
158 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800159 RESUME_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700160 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700161 USB_BOOT_TIMEOUT = 150
Chris Sosab76e0ee2013-05-22 16:55:41 -0700162
163 # We have a long timeout to ensure we don't flakily fail due to other
164 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
165 REBOOT_TIMEOUT = 300
166
Richard Barnette03a0c132012-11-05 12:40:35 -0800167 _INSTALL_TIMEOUT = 240
168
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800169 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
170 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
171 _USB_POWER_TIMEOUT = 5
172 _POWER_CYCLE_TIMEOUT = 10
173
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800174
Richard Barnette82c35912012-11-20 10:09:10 -0800175 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
176 'rpm_recovery_boards', type=str).split(',')
177
178 _MAX_POWER_CYCLE_ATTEMPTS = 6
179 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
180 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
181 'host[0-9]+')
182 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
183 'in_illuminance0_raw',
184 'illuminance0_input']
185 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
186 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800187 _DETECTABLE_LABELS = []
188 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
189 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700190
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800191 # Constants used in ping_wait_up() and ping_wait_down().
192 #
193 # _PING_WAIT_COUNT is the approximate number of polling
194 # cycles to use when waiting for a host state change.
195 #
196 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
197 # for arguments to the internal _ping_wait_for_status()
198 # method.
199 _PING_WAIT_COUNT = 40
200 _PING_STATUS_DOWN = False
201 _PING_STATUS_UP = True
202
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800203 # Allowed values for the power_method argument.
204
205 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
206 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
207 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
208 POWER_CONTROL_RPM = 'RPM'
209 POWER_CONTROL_SERVO = 'servoj10'
210 POWER_CONTROL_MANUAL = 'manual'
211
212 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
213 POWER_CONTROL_SERVO,
214 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800215
Simran Basi5e6339a2013-03-21 11:34:32 -0700216 _RPM_OUTLET_CHANGED = 'outlet_changed'
217
beeps687243d2013-07-18 15:29:27 -0700218 # pylint: disable=E1120
219 _NOTIFY_ADDRESS = global_config.global_config.get_config_value(
beepsdae65fd2013-07-26 16:24:41 -0700220 'SCHEDULER', 'notify_email_errors', default='')
beeps687243d2013-07-18 15:29:27 -0700221
222 _SENDER_ADDRESS = global_config.global_config.get_config_value(
223 'SCHEDULER', "notify_email_from", default=getpass.getuser())
224
225 _ERROR_EMAIL_SUBJECT_FORMAT = 'job_repo_url changed for host %s'
226 _ERROR_EMAIL_MSG_FORMAT = ('While verifying the job_repo_url on %(host)s '
227 'the devserver changed from %(old_devserver)s '
228 'to %(new_devserver)s. This might indicate a '
beepsdae65fd2013-07-26 16:24:41 -0700229 'delay in job with id: %(job_id)s, re-staging '
230 'artifacts took an additional %(stage_time)s '
231 'seconds.')
beeps687243d2013-07-18 15:29:27 -0700232
J. Richard Barnette964fba02012-10-24 17:34:29 -0700233 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800234 def get_servo_arguments(args_dict):
235 """Extract servo options from `args_dict` and return the result.
236
237 Take the provided dictionary of argument options and return
238 a subset that represent standard arguments needed to
239 construct a servo object for a host. The intent is to
240 provide standard argument processing from run_remote_tests
241 for tests that require a servo to operate.
242
243 Recommended usage:
244 ~~~~~~~~
245 args_dict = utils.args_to_dict(args)
246 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
247 host = hosts.create_host(machine, servo_args=servo_args)
248 ~~~~~~~~
249
250 @param args_dict Dictionary from which to extract the servo
251 arguments.
252 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700253 servo_args = {}
254 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800255 if arg in args_dict:
256 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700257 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700258
J. Richard Barnette964fba02012-10-24 17:34:29 -0700259
260 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700261 """Initialize superclasses, and |self.servo|.
262
263 For creating the host servo object, there are three
264 possibilities: First, if the host is a lab system known to
265 have a servo board, we connect to that servo unconditionally.
266 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700267 servo features for testing, it will pass settings for
268 `servo_host`, `servo_port`, or both. If neither of these
269 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700270
271 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700272 super(SiteHost, self)._initialize(hostname=hostname,
273 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700274 # self.env is a dictionary of environment variable settings
275 # to be exported for commands run on the host.
276 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
277 # errors that might happen.
278 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700279 self._xmlrpc_proxy_map = {}
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800280 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700281 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700282 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700283
284
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500285 def get_repair_image_name(self):
286 """Generate a image_name from variables in the global config.
287
288 @returns a str of $board-version/$BUILD.
289
290 """
291 stable_version = global_config.global_config.get_config_value(
292 'CROS', 'stable_cros_version')
293 build_pattern = global_config.global_config.get_config_value(
294 'CROS', 'stable_build_pattern')
295 board = self._get_board_from_afe()
296 if board is None:
297 raise error.AutoservError('DUT has no board attribute, '
298 'cannot be repaired.')
299 return build_pattern % (board, stable_version)
300
301
Scott Zawalski62bacae2013-03-05 10:40:32 -0500302 def _host_in_AFE(self):
303 """Check if the host is an object the AFE knows.
304
305 @returns the host object.
306 """
307 return self._AFE.get_hosts(hostname=self.hostname)
308
309
Chris Sosab76e0ee2013-05-22 16:55:41 -0700310 def lookup_job_repo_url(self):
311 """Looks up the job_repo_url for the host.
312
313 @returns job_repo_url from AFE or None if not found.
314
315 @raises KeyError if the host does not have a job_repo_url
316 """
317 if not self._host_in_AFE():
318 return None
319
320 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700321 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
322 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700323
324
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500325 def clear_cros_version_labels_and_job_repo_url(self):
326 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500327 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400328 return
329
Scott Zawalski62bacae2013-03-05 10:40:32 -0500330 host_list = [self.hostname]
331 labels = self._AFE.get_labels(
332 name__startswith=ds_constants.VERSION_PREFIX,
333 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800334
Scott Zawalski62bacae2013-03-05 10:40:32 -0500335 for label in labels:
336 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500337
beepscb6f1e22013-06-28 19:14:10 -0700338 self.update_job_repo_url(None, None)
339
340
341 def update_job_repo_url(self, devserver_url, image_name):
342 """
343 Updates the job_repo_url host attribute and asserts it's value.
344
345 @param devserver_url: The devserver to use in the job_repo_url.
346 @param image_name: The name of the image to use in the job_repo_url.
347
348 @raises AutoservError: If we failed to update the job_repo_url.
349 """
350 repo_url = None
351 if devserver_url and image_name:
352 repo_url = tools.get_package_url(devserver_url, image_name)
353 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500354 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700355 if self.lookup_job_repo_url() != repo_url:
356 raise error.AutoservError('Failed to update job_repo_url with %s, '
357 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500358
359
Dan Shie9309262013-06-19 22:50:21 -0700360 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400361 """Add cros_version labels and host attribute job_repo_url.
362
363 @param image_name: The name of the image e.g.
364 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700365
Scott Zawalskieadbf702013-03-14 09:23:06 -0400366 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500367 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400368 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500369
Scott Zawalskieadbf702013-03-14 09:23:06 -0400370 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700371 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500372
373 labels = self._AFE.get_labels(name=cros_label)
374 if labels:
375 label = labels[0]
376 else:
377 label = self._AFE.create_label(name=cros_label)
378
379 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700380 self.update_job_repo_url(devserver_url, image_name)
381
382
beepsdae65fd2013-07-26 16:24:41 -0700383 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700384 """
385 Make sure job_repo_url of this host is valid.
386
joychen03eaad92013-06-26 09:55:21 -0700387 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700388 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
389 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
390 download and extract it. If the devserver embedded in the url is
391 unresponsive, update the job_repo_url of the host after staging it on
392 another devserver.
393
394 @param job_repo_url: A url pointing to the devserver where the autotest
395 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700396 @param tag: The tag from the server job, in the format
397 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700398
399 @raises DevServerException: If we could not resolve a devserver.
400 @raises AutoservError: If we're unable to save the new job_repo_url as
401 a result of choosing a new devserver because the old one failed to
402 respond to a health check.
403 """
404 job_repo_url = self.lookup_job_repo_url()
405 if not job_repo_url:
406 logging.warning('No job repo url set on host %s', self.hostname)
407 return
408
409 logging.info('Verifying job repo url %s', job_repo_url)
410 devserver_url, image_name = tools.get_devserver_build_from_package_url(
411 job_repo_url)
412
413 ds = dev_server.ImageServer.resolve(image_name)
beeps687243d2013-07-18 15:29:27 -0700414 new_devserver_url = ds.url()
beepscb6f1e22013-06-28 19:14:10 -0700415
416 logging.info('Staging autotest artifacts for %s on devserver %s',
417 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700418
419 start_time = time.time()
beepscb6f1e22013-06-28 19:14:10 -0700420 ds.stage_artifacts(image_name, ['autotest'])
beeps687243d2013-07-18 15:29:27 -0700421 stage_time = time.time() - start_time
422
423 # Record how much of the verification time comes from a devserver
424 # restage. If we're doing things right we should not see multiple
425 # devservers for a given board/build/branch path.
426 try:
427 board, build_type, branch = site_utils.ParseBuildName(
428 image_name)[:3]
429 except site_utils.ParseBuildNameException as e:
430 pass
431 else:
432 new_devserver = new_devserver_url[
433 new_devserver_url.find('/')+2:new_devserver_url.rfind(':')]
434 stats_key = {
435 'board': board,
436 'build_type': build_type,
437 'branch': branch,
438 'devserver': new_devserver.replace('.', '_'),
439 }
440 stats.Gauge('verify_job_repo_url').send(
441 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
442 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700443
444 if ds.url() != devserver_url:
beepsdae65fd2013-07-26 16:24:41 -0700445
446 # Since this is only to add traceability to devserver flakes we
447 # make a best effort attempt at getting the job id by parsing the
448 # tag.
449 try:
450 job_id = tko_utils.get_afe_job_id(tag)
451 except ValueError:
452 logging.debug('Could not determine job id from tag %s', tag)
453 job_id = 'NA'
454
beeps687243d2013-07-18 15:29:27 -0700455 error_dict = {
456 'host': self.hostname,
457 'old_devserver': devserver_url,
458 'new_devserver': new_devserver_url,
beepsdae65fd2013-07-26 16:24:41 -0700459 'job_id': job_id,
beeps687243d2013-07-18 15:29:27 -0700460 'stage_time': stage_time,
461 }
462 try:
463 mail.send(self._SENDER_ADDRESS, self._NOTIFY_ADDRESS, '',
464 self._ERROR_EMAIL_SUBJECT_FORMAT % self.hostname,
465 self._ERROR_EMAIL_MSG_FORMAT % error_dict)
466 except smtplib.SMTPDataError:
467 logging.warning(self._ERROR_EMAIL_MSG_FORMAT, error_dict)
468
beepscb6f1e22013-06-28 19:14:10 -0700469 self.update_job_repo_url(ds.url(), image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400470
471
Dan Shi0f466e82013-02-22 15:44:58 -0800472 def _try_stateful_update(self, update_url, force_update, updater):
473 """Try to use stateful update to initialize DUT.
474
475 When DUT is already running the same version that machine_install
476 tries to install, stateful update is a much faster way to clean up
477 the DUT for testing, compared to a full reimage. It is implemeted
478 by calling autoupdater.run_update, but skipping updating root, as
479 updating the kernel is time consuming and not necessary.
480
481 @param update_url: url of the image.
482 @param force_update: Set to True to update the image even if the DUT
483 is running the same version.
484 @param updater: ChromiumOSUpdater instance used to update the DUT.
485 @returns: True if the DUT was updated with stateful update.
486
487 """
488 if not updater.check_version():
489 return False
490 if not force_update:
491 logging.info('Canceling stateful update because the new and '
492 'old versions are the same.')
493 return False
494 # Following folders should be rebuilt after stateful update.
495 # A test file is used to confirm each folder gets rebuilt after
496 # the stateful update.
497 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
498 test_file = '.test_file_to_be_deleted'
499 for folder in folders_to_check:
500 touch_path = os.path.join(folder, test_file)
501 self.run('touch %s' % touch_path)
502
503 if not updater.run_update(force_update=True, update_root=False):
504 return False
505
506 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700507 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800508 check_file_cmd = 'test -f %s; echo $?'
509 for folder in folders_to_check:
510 test_file_path = os.path.join(folder, test_file)
511 result = self.run(check_file_cmd % test_file_path,
512 ignore_status=True)
513 if result.exit_status == 1:
514 return False
515 return True
516
517
J. Richard Barnette7275b612013-06-04 18:13:11 -0700518 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800519 """After the DUT is updated, confirm machine_install succeeded.
520
521 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700522 @param expected_kernel: kernel expected to be active after reboot,
523 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800524
525 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700526 # Touch the lab machine file to leave a marker that
527 # distinguishes this image from other test images.
528 # Afterwards, we must re-run the autoreboot script because
529 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800530 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800531 self.run('start autoreboot')
532
J. Richard Barnette7275b612013-06-04 18:13:11 -0700533 # Figure out the newly active kernel.
534 active_kernel, _ = updater.get_kernel_state()
535
536 # Check for rollback due to a bad build.
537 if expected_kernel and active_kernel != expected_kernel:
538 # Print out some information to make it easier to debug
539 # the rollback.
Dan Shi0f466e82013-02-22 15:44:58 -0800540 logging.debug('Dumping partition table.')
Dan Shi346725f2013-03-20 15:22:38 -0700541 self.run('cgpt show $(rootdev -s -d)')
Dan Shi0f466e82013-02-22 15:44:58 -0800542 logging.debug('Dumping crossystem for firmware debugging.')
Dan Shi346725f2013-03-20 15:22:38 -0700543 self.run('crossystem --all')
Dan Shi0f466e82013-02-22 15:44:58 -0800544 raise autoupdater.ChromiumOSError(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700545 'Build %s failed to boot on %s; system rolled back '
546 'to previous build' % (updater.update_version,
547 self.hostname))
Dan Shi0f466e82013-02-22 15:44:58 -0800548
J. Richard Barnette7275b612013-06-04 18:13:11 -0700549 # Check that we've got the build we meant to install.
550 if not updater.check_version_to_confirm_install():
551 raise autoupdater.ChromiumOSError(
552 'Failed to update %s to build %s; found build '
553 '%s instead' % (self.hostname,
554 updater.update_version,
555 updater.get_build_id()))
Scott Zawalski62bacae2013-03-05 10:40:32 -0500556
J. Richard Barnette7275b612013-06-04 18:13:11 -0700557 # Make sure chromeos-setgoodkernel runs.
558 try:
Dan Shi0f466e82013-02-22 15:44:58 -0800559 utils.poll_for_condition(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700560 lambda: (updater.get_kernel_tries(active_kernel) == 0
561 and updater.get_kernel_success(active_kernel)),
562 exception=autoupdater.ChromiumOSError(),
Dan Shi0f466e82013-02-22 15:44:58 -0800563 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
J. Richard Barnette7275b612013-06-04 18:13:11 -0700564 except autoupdater.ChromiumOSError as e:
565 services_status = self.run('status system-services').stdout
566 if services_status != 'system-services start/running\n':
567 event = ('Chrome failed to reach login screen')
568 else:
569 event = ('update-engine failed to call '
570 'chromeos-setgoodkernel')
571 raise autoupdater.ChromiumOSError(
572 'After update and reboot, %s '
573 'within %d seconds' % (event,
574 self._KERNEL_UPDATE_TIMEOUT))
Dan Shi0f466e82013-02-22 15:44:58 -0800575
576
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700577 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400578 """Stage a build on a devserver and return the update_url.
579
580 @param image_name: a name like lumpy-release/R27-3837.0.0
581 @returns an update URL like:
582 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
583 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700584 if not image_name:
585 image_name = self.get_repair_image_name()
586 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400587 devserver = dev_server.ImageServer.resolve(image_name)
588 devserver.trigger_download(image_name, synchronous=False)
589 return tools.image_url_pattern() % (devserver.url(), image_name)
590
591
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700592 def stage_image_for_servo(self, image_name=None):
593 """Stage a build on a devserver and return the update_url.
594
595 @param image_name: a name like lumpy-release/R27-3837.0.0
596 @returns an update URL like:
597 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
598 """
599 if not image_name:
600 image_name = self.get_repair_image_name()
601 logging.info('Staging build for servo install: %s', image_name)
602 devserver = dev_server.ImageServer.resolve(image_name)
603 devserver.stage_artifacts(image_name, ['test_image'])
604 return devserver.get_test_image_url(image_name)
605
606
Chris Sosaa3ac2152012-05-23 22:23:13 -0700607 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500608 local_devserver=False, repair=False):
609 """Install the DUT.
610
Dan Shi0f466e82013-02-22 15:44:58 -0800611 Use stateful update if the DUT is already running the same build.
612 Stateful update does not update kernel and tends to run much faster
613 than a full reimage. If the DUT is running a different build, or it
614 failed to do a stateful update, full update, including kernel update,
615 will be applied to the DUT.
616
Scott Zawalskieadbf702013-03-14 09:23:06 -0400617 Once a host enters machine_install its cros_version label will be
618 removed as well as its host attribute job_repo_url (used for
619 package install).
620
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500621 @param update_url: The url to use for the update
622 pattern: http://$devserver:###/update/$build
623 If update_url is None and repair is True we will install the
624 stable image listed in global_config under
625 CROS.stable_cros_version.
626 @param force_update: Force an update even if the version installed
627 is the same. Default:False
628 @param local_devserver: Used by run_remote_test to allow people to
629 use their local devserver. Default: False
630 @param repair: Whether or not we are in repair mode. This adds special
631 cases for repairing a machine like starting update_engine.
632 Setting repair to True sets force_update to True as well.
633 default: False
634 @raises autoupdater.ChromiumOSError
635
636 """
Dan Shi7458bf62013-06-10 12:50:16 -0700637 if update_url:
638 logging.debug('update url is set to %s', update_url)
639 else:
640 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700641 if self._parser.options.image:
642 requested_build = self._parser.options.image
643 if requested_build.startswith('http://'):
644 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700645 logging.debug('update url is retrieved from requested_build'
646 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700647 else:
648 # Try to stage any build that does not start with
649 # http:// on the devservers defined in
650 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700651 update_url = self._stage_image_for_update(requested_build)
652 logging.debug('Build staged, and update_url is set to: %s',
653 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700654 elif repair:
655 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700656 logging.debug('Build staged, and update_url is set to: %s',
657 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400658 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700659 raise autoupdater.ChromiumOSError(
660 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500661
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500662 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800663 # In case the system is in a bad state, we always reboot the machine
664 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700665 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500666 self.run('stop update-engine; start update-engine')
667 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800668
Chris Sosaa3ac2152012-05-23 22:23:13 -0700669 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700670 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800671 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400672 # Remove cros-version and job_repo_url host attribute from host.
673 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800674 # If the DUT is already running the same build, try stateful update
675 # first. Stateful update does not update kernel and tends to run much
676 # faster than a full reimage.
677 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700678 updated = self._try_stateful_update(
679 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800680 if updated:
681 logging.info('DUT is updated with stateful update.')
682 except Exception as e:
683 logging.exception(e)
684 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700685
Dan Shi0f466e82013-02-22 15:44:58 -0800686 inactive_kernel = None
687 # Do a full update if stateful update is not applicable or failed.
688 if not updated:
689 # In case the system is in a bad state, we always reboot the
690 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700691 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700692
693 # TODO(sosa): Remove temporary hack to get rid of bricked machines
694 # that can't update due to a corrupted policy.
695 self.run('rm -rf /var/lib/whitelist')
696 self.run('touch /var/lib/whitelist')
697 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400698 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700699
Dan Shi0f466e82013-02-22 15:44:58 -0800700 if updater.run_update(force_update):
701 updated = True
702 # Figure out active and inactive kernel.
703 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700704
Dan Shi0f466e82013-02-22 15:44:58 -0800705 # Ensure inactive kernel has higher priority than active.
706 if (updater.get_kernel_priority(inactive_kernel)
707 < updater.get_kernel_priority(active_kernel)):
708 raise autoupdater.ChromiumOSError(
709 'Update failed. The priority of the inactive kernel'
710 ' partition is less than that of the active kernel'
711 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700712
Dan Shi0f466e82013-02-22 15:44:58 -0800713 update_engine_log = '/var/log/update_engine.log'
714 logging.info('Dumping %s', update_engine_log)
715 self.run('cat %s' % update_engine_log)
716 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700717 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700718
Dan Shi0f466e82013-02-22 15:44:58 -0800719 if updated:
720 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400721 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700722 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800723
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700724 # Clean up any old autotest directories which may be lying around.
725 for path in global_config.global_config.get_config_value(
726 'AUTOSERV', 'client_autodir_paths', type=list):
727 self.run('rm -rf ' + path)
728
729
Richard Barnette82c35912012-11-20 10:09:10 -0800730 def _get_board_from_afe(self):
731 """Retrieve this host's board from its labels in the AFE.
732
733 Looks for a host label of the form "board:<board>", and
734 returns the "<board>" part of the label. `None` is returned
735 if there is not a single, unique label matching the pattern.
736
737 @returns board from label, or `None`.
738 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700739 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800740
741
742 def get_build(self):
743 """Retrieve the current build for this Host from the AFE.
744
745 Looks through this host's labels in the AFE to determine its build.
746
747 @returns The current build or None if it could not find it or if there
748 were multiple build labels assigned to this host.
749 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700750 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800751
752
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500753 def _install_repair(self):
754 """Attempt to repair this host using upate-engine.
755
756 If the host is up, try installing the DUT with a stable
757 "repair" version of Chrome OS as defined in the global_config
758 under CROS.stable_cros_version.
759
Scott Zawalski62bacae2013-03-05 10:40:32 -0500760 @raises AutoservRepairMethodNA if the DUT is not reachable.
761 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500762
763 """
764 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500765 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500766
767 logging.info('Attempting to reimage machine to repair image.')
768 try:
769 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700770 except autoupdater.ChromiumOSError as e:
771 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500772 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500773 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500774
775
Scott Zawalski62bacae2013-03-05 10:40:32 -0500776 def servo_install(self, image_url=None):
777 """
778 Re-install the OS on the DUT by:
779 1) installing a test image on a USB storage device attached to the Servo
780 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800781 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700782 3) installing the image with chromeos-install.
783
Scott Zawalski62bacae2013-03-05 10:40:32 -0500784 @param image_url: If specified use as the url to install on the DUT.
785 otherwise boot the currently staged image on the USB stick.
Richard Barnette03a0c132012-11-05 12:40:35 -0800786
Scott Zawalski62bacae2013-03-05 10:40:32 -0500787 @raises AutoservError if the image fails to boot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800788 """
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700789 self.servo.install_recovery_image(image_url)
Richard Barnette03a0c132012-11-05 12:40:35 -0800790 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500791 raise error.AutoservRepairFailure(
792 'DUT failed to boot from USB after %d seconds' %
793 self.USB_BOOT_TIMEOUT)
794
795 self.run('chromeos-install --yes', timeout=self._INSTALL_TIMEOUT)
Richard Barnette03a0c132012-11-05 12:40:35 -0800796 self.servo.power_long_press()
Fang Dengafb88142013-05-30 17:44:31 -0700797 self.servo.switch_usbkey('off')
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700798 # We *must* use power_on() here; on Parrot it's how we get
799 # out of recovery mode.
800 self.servo.get_power_state_controller().power_on()
Richard Barnette03a0c132012-11-05 12:40:35 -0800801 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
802 raise error.AutoservError('DUT failed to reboot installed '
803 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500804 self.BOOT_TIMEOUT)
805
806
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700807 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500808 """Reinstall the DUT utilizing servo and a test image.
809
810 Re-install the OS on the DUT by:
811 1) installing a test image on a USB storage device attached to the Servo
812 board,
813 2) booting that image in recovery mode, and then
814 3) installing the image with chromeos-install.
815
Scott Zawalski62bacae2013-03-05 10:40:32 -0500816 @raises AutoservRepairMethodNA if the device does not have servo
817 support.
818
819 """
820 if not self.servo:
821 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
822 'DUT has no servo support.')
823
824 logging.info('Attempting to recovery servo enabled device with '
825 'servo_repair_reinstall')
826
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700827 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500828 self.servo_install(image_url)
829
830
831 def _servo_repair_power(self):
832 """Attempt to repair DUT using an attached Servo.
833
834 Attempt to power on the DUT via power_long_press.
835
836 @raises AutoservRepairMethodNA if the device does not have servo
837 support.
838 @raises AutoservRepairFailure if the repair fails for any reason.
839 """
840 if not self.servo:
841 raise error.AutoservRepairMethodNA('Repair Power NA: '
842 'DUT has no servo support.')
843
844 logging.info('Attempting to recover servo enabled device by '
845 'powering it off and on.')
846 self.servo.get_power_state_controller().power_off()
847 self.servo.get_power_state_controller().power_on()
848 if self.wait_up(self.BOOT_TIMEOUT):
849 return
850
851 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800852
853
Richard Barnette82c35912012-11-20 10:09:10 -0800854 def _powercycle_to_repair(self):
855 """Utilize the RPM Infrastructure to bring the host back up.
856
857 If the host is not up/repaired after the first powercycle we utilize
858 auto fallback to the last good install by powercycling and rebooting the
859 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500860
861 @raises AutoservRepairMethodNA if the device does not support remote
862 power.
863 @raises AutoservRepairFailure if the repair fails for any reason.
864
Richard Barnette82c35912012-11-20 10:09:10 -0800865 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500866 if not self.has_power():
867 raise error.AutoservRepairMethodNA('Device does not support power.')
868
Richard Barnette82c35912012-11-20 10:09:10 -0800869 logging.info('Attempting repair via RPM powercycle.')
870 failed_cycles = 0
871 self.power_cycle()
872 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
873 failed_cycles += 1
874 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -0500875 raise error.AutoservRepairFailure(
876 'Powercycled host %s %d times; device did not come back'
877 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -0800878 self.power_cycle()
879 if failed_cycles == 0:
880 logging.info('Powercycling was successful first time.')
881 else:
882 logging.info('Powercycling was successful after %d failures.',
883 failed_cycles)
884
885
886 def repair_full(self):
887 """Repair a host for repair level NO_PROTECTION.
888
889 This overrides the base class function for repair; it does
890 not call back to the parent class, but instead offers a
891 simplified implementation based on the capabilities in the
892 Chrome OS test lab.
893
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700894 If `self.verify()` fails, the following procedures are
895 attempted:
896 1. Try to re-install to a known stable image using
897 auto-update.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500898 2. If there's a servo for the DUT, try to power the DUT off and
899 on.
900 3. If there's a servo for the DUT, try to re-install via
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700901 the servo.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500902 4. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800903 by power-cycling.
904
905 As with the parent method, the last operation performed on
906 the DUT must be to call `self.verify()`; if that call fails,
907 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700908
Scott Zawalski62bacae2013-03-05 10:40:32 -0500909 @raises AutoservRepairTotalFailure if the repair process fails to
910 fix the DUT.
Richard Barnette82c35912012-11-20 10:09:10 -0800911 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500912 # TODO(scottz): This should use something similar to label_decorator,
913 # but needs to be populated in order so DUTs are repaired with the
914 # least amount of effort.
915 repair_funcs = [self._install_repair, self._servo_repair_power,
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700916 self._servo_repair_reinstall,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500917 self._powercycle_to_repair]
918 errors = []
919 for repair_func in repair_funcs:
920 try:
921 repair_func()
922 self.verify()
923 return
924 except Exception as e:
925 logging.warn('Failed to repair device: %s', e)
926 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500927
Scott Zawalski62bacae2013-03-05 10:40:32 -0500928 raise error.AutoservRepairTotalFailure(
929 'All attempts at repairing the device failed:\n%s' %
930 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -0800931
932
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700933 def close(self):
934 super(SiteHost, self).close()
935 self.xmlrpc_disconnect_all()
936
937
Simran Basi5e6339a2013-03-21 11:34:32 -0700938 def _cleanup_poweron(self):
939 """Special cleanup method to make sure hosts always get power back."""
940 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
941 hosts = afe.get_hosts(hostname=self.hostname)
942 if not hosts or not (self._RPM_OUTLET_CHANGED in
943 hosts[0].attributes):
944 return
945 logging.debug('This host has recently interacted with the RPM'
946 ' Infrastructure. Ensuring power is on.')
947 try:
948 self.power_on()
949 except rpm_client.RemotePowerException:
950 # If cleanup has completed but there was an issue with the RPM
951 # Infrastructure, log an error message rather than fail cleanup
952 logging.error('Failed to turn Power On for this host after '
953 'cleanup through the RPM Infrastructure.')
954 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
955 hostname=self.hostname)
956
957
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700958 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700959 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800960 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500961 try:
962 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
963 '_clear_login_prompt_state')
964 self.run('restart ui')
965 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
966 '_wait_for_login_prompt')
Alex Millerf4517962013-02-25 15:03:02 -0800967 except (error.AutotestRunError, error.AutoservRunError):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500968 logging.warn('Unable to restart ui, rebooting device.')
969 # Since restarting the UI fails fall back to normal Autotest
970 # cleanup routines, i.e. reboot the machine.
971 super(SiteHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -0700972 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -0700973 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -0700974 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700975
976
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700977 def reboot(self, **dargs):
978 """
979 This function reboots the site host. The more generic
980 RemoteHost.reboot() performs sync and sleeps for 5
981 seconds. This is not necessary for Chrome OS devices as the
982 sync should be finished in a short time during the reboot
983 command.
984 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800985 if 'reboot_cmd' not in dargs:
986 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
987 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700988 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800989 if 'fastsync' not in dargs:
990 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700991 super(SiteHost, self).reboot(**dargs)
992
993
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700994 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800995 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700996
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800997 Tests for the following conditions:
998 1. All conditions tested by the parent version of this
999 function.
1000 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001001 3. Sufficient space in /mnt/stateful_partition/encrypted.
1002 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001003
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001004 """
1005 super(SiteHost, self).verify_software()
1006 self.check_diskspace(
1007 '/mnt/stateful_partition',
1008 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001009 'SERVER', 'gb_diskspace_required', type=float,
1010 default=20.0))
1011 self.check_diskspace(
1012 '/mnt/stateful_partition/encrypted',
1013 global_config.global_config.get_config_value(
1014 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1015 default=0.1))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001016 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001017 # Makes sure python is present, loads and can use built in functions.
1018 # We have seen cases where importing cPickle fails with undefined
1019 # symbols in cPickle.so.
1020 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001021
1022
Christopher Wileyd78249a2013-03-01 13:05:31 -08001023 def xmlrpc_connect(self, command, port, command_name=None,
1024 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001025 """Connect to an XMLRPC server on the host.
1026
1027 The `command` argument should be a simple shell command that
1028 starts an XMLRPC server on the given `port`. The command
1029 must not daemonize, and must terminate cleanly on SIGTERM.
1030 The command is started in the background on the host, and a
1031 local XMLRPC client for the server is created and returned
1032 to the caller.
1033
1034 Note that the process of creating an XMLRPC client makes no
1035 attempt to connect to the remote server; the caller is
1036 responsible for determining whether the server is running
1037 correctly, and is ready to serve requests.
1038
Christopher Wileyd78249a2013-03-01 13:05:31 -08001039 Optionally, the caller can pass ready_test_name, a string
1040 containing the name of a method to call on the proxy. This
1041 method should take no parameters and return successfully only
1042 when the server is ready to process client requests. When
1043 ready_test_name is set, xmlrpc_connect will block until the
1044 proxy is ready, and throw a TestError if the server isn't
1045 ready by timeout_seconds.
1046
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001047 @param command Shell command to start the server.
1048 @param port Port number on which the server is expected to
1049 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001050 @param command_name String to use as input to `pkill` to
1051 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001052 @param ready_test_name String containing the name of a
1053 method defined on the XMLRPC server.
1054 @param timeout_seconds Number of seconds to wait
1055 for the server to become 'ready.' Will throw a
1056 TestFail error if server is not ready in time.
1057
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001058 """
1059 self.xmlrpc_disconnect(port)
1060
1061 # Chrome OS on the target closes down most external ports
1062 # for security. We could open the port, but doing that
1063 # would conflict with security tests that check that only
1064 # expected ports are open. So, to get to the port on the
1065 # target we use an ssh tunnel.
1066 local_port = utils.get_unused_port()
1067 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1068 ssh_cmd = make_ssh_command(opts=tunnel_options)
1069 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1070 logging.debug('Full tunnel command: %s', tunnel_cmd)
1071 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1072 logging.debug('Started XMLRPC tunnel, local = %d'
1073 ' remote = %d, pid = %d',
1074 local_port, port, tunnel_proc.pid)
1075
1076 # Start the server on the host. Redirection in the command
1077 # below is necessary, because 'ssh' won't terminate until
1078 # background child processes close stdin, stdout, and
1079 # stderr.
1080 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
1081 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
1082 logging.debug('Started XMLRPC server on host %s, pid = %s',
1083 self.hostname, remote_pid)
1084
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001085 self._xmlrpc_proxy_map[port] = (command_name, tunnel_proc)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001086 rpc_url = 'http://localhost:%d' % local_port
Christopher Wileyd78249a2013-03-01 13:05:31 -08001087 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
1088 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001089 # retry.retry logs each attempt; calculate delay_sec to
1090 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001091 @retry.retry((socket.error,
1092 xmlrpclib.ProtocolError,
1093 httplib.BadStatusLine),
Christopher Wileyd78249a2013-03-01 13:05:31 -08001094 timeout_min=timeout_seconds/60.0,
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001095 delay_sec=min(max(timeout_seconds/20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001096 def ready_test():
1097 """ Call proxy.ready_test_name(). """
1098 getattr(proxy, ready_test_name)()
1099 successful = False
1100 try:
1101 logging.info('Waiting %d seconds for XMLRPC server '
1102 'to start.', timeout_seconds)
1103 ready_test()
1104 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001105 finally:
1106 if not successful:
1107 logging.error('Failed to start XMLRPC server.')
1108 self.xmlrpc_disconnect(port)
1109 logging.info('XMLRPC server started successfully.')
1110 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001111
1112 def xmlrpc_disconnect(self, port):
1113 """Disconnect from an XMLRPC server on the host.
1114
1115 Terminates the remote XMLRPC server previously started for
1116 the given `port`. Also closes the local ssh tunnel created
1117 for the connection to the host. This function does not
1118 directly alter the state of a previously returned XMLRPC
1119 client object; however disconnection will cause all
1120 subsequent calls to methods on the object to fail.
1121
1122 This function does nothing if requested to disconnect a port
1123 that was not previously connected via `self.xmlrpc_connect()`
1124
1125 @param port Port number passed to a previous call to
1126 `xmlrpc_connect()`
1127 """
1128 if port not in self._xmlrpc_proxy_map:
1129 return
1130 entry = self._xmlrpc_proxy_map[port]
1131 remote_name = entry[0]
1132 tunnel_proc = entry[1]
1133 if remote_name:
1134 # We use 'pkill' to find our target process rather than
1135 # a PID, because the host may have rebooted since
1136 # connecting, and we don't want to kill an innocent
1137 # process with the same PID.
1138 #
1139 # 'pkill' helpfully exits with status 1 if no target
1140 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001141 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001142 # status.
1143 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
1144
1145 if tunnel_proc.poll() is None:
1146 tunnel_proc.terminate()
1147 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1148 else:
1149 logging.debug('Tunnel pid %d terminated early, status %d',
1150 tunnel_proc.pid, tunnel_proc.returncode)
1151 del self._xmlrpc_proxy_map[port]
1152
1153
1154 def xmlrpc_disconnect_all(self):
1155 """Disconnect all known XMLRPC proxy ports."""
1156 for port in self._xmlrpc_proxy_map.keys():
1157 self.xmlrpc_disconnect(port)
1158
1159
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001160 def _ping_check_status(self, status):
1161 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001162
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001163 @param status Check the ping status against this value.
1164 @return True iff `status` and the result of ping are the same
1165 (i.e. both True or both False).
1166
1167 """
1168 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1169 return not (status ^ (ping_val == 0))
1170
1171 def _ping_wait_for_status(self, status, timeout):
1172 """Wait for the host to have a given status (UP or DOWN).
1173
1174 Status is checked by polling. Polling will not last longer
1175 than the number of seconds in `timeout`. The polling
1176 interval will be long enough that only approximately
1177 _PING_WAIT_COUNT polling cycles will be executed, subject
1178 to a maximum interval of about one minute.
1179
1180 @param status Waiting will stop immediately if `ping` of the
1181 host returns this status.
1182 @param timeout Poll for at most this many seconds.
1183 @return True iff the host status from `ping` matched the
1184 requested status at the time of return.
1185
1186 """
1187 # _ping_check_status() takes about 1 second, hence the
1188 # "- 1" in the formula below.
1189 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1190 end_time = time.time() + timeout
1191 while time.time() <= end_time:
1192 if self._ping_check_status(status):
1193 return True
1194 if poll_interval > 0:
1195 time.sleep(poll_interval)
1196
1197 # The last thing we did was sleep(poll_interval), so it may
1198 # have been too long since the last `ping`. Check one more
1199 # time, just to be sure.
1200 return self._ping_check_status(status)
1201
1202 def ping_wait_up(self, timeout):
1203 """Wait for the host to respond to `ping`.
1204
1205 N.B. This method is not a reliable substitute for
1206 `wait_up()`, because a host that responds to ping will not
1207 necessarily respond to ssh. This method should only be used
1208 if the target DUT can be considered functional even if it
1209 can't be reached via ssh.
1210
1211 @param timeout Minimum time to allow before declaring the
1212 host to be non-responsive.
1213 @return True iff the host answered to ping before the timeout.
1214
1215 """
1216 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001217
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001218 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001219 """Wait until the host no longer responds to `ping`.
1220
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001221 This function can be used as a slightly faster version of
1222 `wait_down()`, by avoiding potentially long ssh timeouts.
1223
1224 @param timeout Minimum time to allow for the host to become
1225 non-responsive.
1226 @return True iff the host quit answering ping before the
1227 timeout.
1228
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001229 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001230 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001231
1232 def test_wait_for_sleep(self):
1233 """Wait for the client to enter low-power sleep mode.
1234
1235 The test for "is asleep" can't distinguish a system that is
1236 powered off; to confirm that the unit was asleep, it is
1237 necessary to force resume, and then call
1238 `test_wait_for_resume()`.
1239
1240 This function is expected to be called from a test as part
1241 of a sequence like the following:
1242
1243 ~~~~~~~~
1244 boot_id = host.get_boot_id()
1245 # trigger sleep on the host
1246 host.test_wait_for_sleep()
1247 # trigger resume on the host
1248 host.test_wait_for_resume(boot_id)
1249 ~~~~~~~~
1250
1251 @exception TestFail The host did not go to sleep within
1252 the allowed time.
1253 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001254 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001255 raise error.TestFail(
1256 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001257 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001258
1259
1260 def test_wait_for_resume(self, old_boot_id):
1261 """Wait for the client to resume from low-power sleep mode.
1262
1263 The `old_boot_id` parameter should be the value from
1264 `get_boot_id()` obtained prior to entering sleep mode. A
1265 `TestFail` exception is raised if the boot id changes.
1266
1267 See @ref test_wait_for_sleep for more on this function's
1268 usage.
1269
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001270 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001271 target host went to sleep.
1272
1273 @exception TestFail The host did not respond within the
1274 allowed time.
1275 @exception TestFail The host responded, but the boot id test
1276 indicated a reboot rather than a sleep
1277 cycle.
1278 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001279 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001280 raise error.TestFail(
1281 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001282 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001283 else:
1284 new_boot_id = self.get_boot_id()
1285 if new_boot_id != old_boot_id:
1286 raise error.TestFail(
1287 'client rebooted, but sleep was expected'
1288 ' (old boot %s, new boot %s)'
1289 % (old_boot_id, new_boot_id))
1290
1291
1292 def test_wait_for_shutdown(self):
1293 """Wait for the client to shut down.
1294
1295 The test for "has shut down" can't distinguish a system that
1296 is merely asleep; to confirm that the unit was down, it is
1297 necessary to force boot, and then call test_wait_for_boot().
1298
1299 This function is expected to be called from a test as part
1300 of a sequence like the following:
1301
1302 ~~~~~~~~
1303 boot_id = host.get_boot_id()
1304 # trigger shutdown on the host
1305 host.test_wait_for_shutdown()
1306 # trigger boot on the host
1307 host.test_wait_for_boot(boot_id)
1308 ~~~~~~~~
1309
1310 @exception TestFail The host did not shut down within the
1311 allowed time.
1312 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001313 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001314 raise error.TestFail(
1315 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001316 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001317
1318
1319 def test_wait_for_boot(self, old_boot_id=None):
1320 """Wait for the client to boot from cold power.
1321
1322 The `old_boot_id` parameter should be the value from
1323 `get_boot_id()` obtained prior to shutting down. A
1324 `TestFail` exception is raised if the boot id does not
1325 change. The boot id test is omitted if `old_boot_id` is not
1326 specified.
1327
1328 See @ref test_wait_for_shutdown for more on this function's
1329 usage.
1330
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001331 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001332 shut down.
1333
1334 @exception TestFail The host did not respond within the
1335 allowed time.
1336 @exception TestFail The host responded, but the boot id test
1337 indicated that there was no reboot.
1338 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001339 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001340 raise error.TestFail(
1341 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001342 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001343 elif old_boot_id:
1344 if self.get_boot_id() == old_boot_id:
1345 raise error.TestFail(
1346 'client is back up, but did not reboot'
1347 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001348
1349
1350 @staticmethod
1351 def check_for_rpm_support(hostname):
1352 """For a given hostname, return whether or not it is powered by an RPM.
1353
1354 @return None if this host does not follows the defined naming format
1355 for RPM powered DUT's in the lab. If it does follow the format,
1356 it returns a regular expression MatchObject instead.
1357 """
Richard Barnette82c35912012-11-20 10:09:10 -08001358 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001359
1360
1361 def has_power(self):
1362 """For this host, return whether or not it is powered by an RPM.
1363
1364 @return True if this host is in the CROS lab and follows the defined
1365 naming format.
1366 """
1367 return SiteHost.check_for_rpm_support(self.hostname)
1368
1369
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001370 def _set_power(self, state, power_method):
1371 """Sets the power to the host via RPM, Servo or manual.
1372
1373 @param state Specifies which power state to set to DUT
1374 @param power_method Specifies which method of power control to
1375 use. By default "RPM" will be used. Valid values
1376 are the strings "RPM", "manual", "servoj10".
1377
1378 """
1379 ACCEPTABLE_STATES = ['ON', 'OFF']
1380
1381 if state.upper() not in ACCEPTABLE_STATES:
1382 raise error.TestError('State must be one of: %s.'
1383 % (ACCEPTABLE_STATES,))
1384
1385 if power_method == self.POWER_CONTROL_SERVO:
1386 logging.info('Setting servo port J10 to %s', state)
1387 self.servo.set('prtctl3_pwren', state.lower())
1388 time.sleep(self._USB_POWER_TIMEOUT)
1389 elif power_method == self.POWER_CONTROL_MANUAL:
1390 logging.info('You have %d seconds to set the AC power to %s.',
1391 self._POWER_CYCLE_TIMEOUT, state)
1392 time.sleep(self._POWER_CYCLE_TIMEOUT)
1393 else:
1394 if not self.has_power():
1395 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001396 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1397 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1398 hostname=self.hostname)
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001399 rpm_client.set_power(self.hostname, state.upper())
Simran Basid5e5e272012-09-24 15:23:59 -07001400
1401
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001402 def power_off(self, power_method=POWER_CONTROL_RPM):
1403 """Turn off power to this host via RPM, Servo or manual.
1404
1405 @param power_method Specifies which method of power control to
1406 use. By default "RPM" will be used. Valid values
1407 are the strings "RPM", "manual", "servoj10".
1408
1409 """
1410 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001411
1412
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001413 def power_on(self, power_method=POWER_CONTROL_RPM):
1414 """Turn on power to this host via RPM, Servo or manual.
1415
1416 @param power_method Specifies which method of power control to
1417 use. By default "RPM" will be used. Valid values
1418 are the strings "RPM", "manual", "servoj10".
1419
1420 """
1421 self._set_power('ON', power_method)
1422
1423
1424 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1425 """Cycle power to this host by turning it OFF, then ON.
1426
1427 @param power_method Specifies which method of power control to
1428 use. By default "RPM" will be used. Valid values
1429 are the strings "RPM", "manual", "servoj10".
1430
1431 """
1432 if power_method in (self.POWER_CONTROL_SERVO,
1433 self.POWER_CONTROL_MANUAL):
1434 self.power_off(power_method=power_method)
1435 time.sleep(self._POWER_CYCLE_TIMEOUT)
1436 self.power_on(power_method=power_method)
1437 else:
1438 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001439
1440
1441 def get_platform(self):
1442 """Determine the correct platform label for this host.
1443
1444 @returns a string representing this host's platform.
1445 """
1446 crossystem = utils.Crossystem(self)
1447 crossystem.init()
1448 # Extract fwid value and use the leading part as the platform id.
1449 # fwid generally follow the format of {platform}.{firmware version}
1450 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1451 platform = crossystem.fwid().split('.')[0].lower()
1452 # Newer platforms start with 'Google_' while the older ones do not.
1453 return platform.replace('google_', '')
1454
1455
Aviv Keshet74c89a92013-02-04 15:18:30 -08001456 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001457 def get_board(self):
1458 """Determine the correct board label for this host.
1459
1460 @returns a string representing this host's board.
1461 """
1462 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1463 run_method=self.run)
1464 board = release_info['CHROMEOS_RELEASE_BOARD']
1465 # Devices in the lab generally have the correct board name but our own
1466 # development devices have {board_name}-signed-{key_type}. The board
1467 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001468 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001469 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001470 return board_format_string % board.split('-')[0]
1471 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001472
1473
Aviv Keshet74c89a92013-02-04 15:18:30 -08001474 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001475 def has_lightsensor(self):
1476 """Determine the correct board label for this host.
1477
1478 @returns the string 'lightsensor' if this host has a lightsensor or
1479 None if it does not.
1480 """
1481 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001482 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001483 try:
1484 # Run the search cmd following the symlinks. Stderr_tee is set to
1485 # None as there can be a symlink loop, but the command will still
1486 # execute correctly with a few messages printed to stderr.
1487 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1488 return 'lightsensor'
1489 except error.AutoservRunError:
1490 # egrep exited with a return code of 1 meaning none of the possible
1491 # lightsensor files existed.
1492 return None
1493
1494
Aviv Keshet74c89a92013-02-04 15:18:30 -08001495 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001496 def has_bluetooth(self):
1497 """Determine the correct board label for this host.
1498
1499 @returns the string 'bluetooth' if this host has bluetooth or
1500 None if it does not.
1501 """
1502 try:
1503 self.run('test -d /sys/class/bluetooth/hci0')
1504 # test exited with a return code of 0.
1505 return 'bluetooth'
1506 except error.AutoservRunError:
1507 # test exited with a return code 1 meaning the directory did not
1508 # exist.
1509 return None
1510
1511
1512 def get_labels(self):
1513 """Return a list of labels for this given host.
1514
1515 This is the main way to retrieve all the automatic labels for a host
1516 as it will run through all the currently implemented label functions.
1517 """
1518 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001519 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001520 label = label_function(self)
1521 if label:
1522 labels.append(label)
1523 return labels