blob: eb4fc3dd1763d7e6f32da0b352b21629af2b1f01 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
beeps687243d2013-07-18 15:29:27 -07006import getpass
Christopher Wiley0ed712b2013-04-09 15:25:12 -07007import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import logging
Dan Shi0f466e82013-02-22 15:44:58 -08009import os
Simran Basid5e5e272012-09-24 15:23:59 -070010import re
beeps687243d2013-07-18 15:29:27 -070011import smtplib
Christopher Wileyd78249a2013-03-01 13:05:31 -080012import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070013import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070014import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070015import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070016
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080018from autotest_lib.client.common_lib import error
19from autotest_lib.client.common_lib import global_config
beeps687243d2013-07-18 15:29:27 -070020from autotest_lib.client.common_lib import mail
21from autotest_lib.client.common_lib import site_utils
J. Richard Barnette45e93de2012-04-11 17:24:15 -070022from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080023from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080024from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080025from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070026from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070027from autotest_lib.server import autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070028from autotest_lib.server import utils as server_utils
Scott Zawalski89c44dd2013-02-26 09:28:02 -050029from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070030from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
J. Richard Barnette75487572013-03-08 12:47:50 -080031from autotest_lib.server.cros.servo import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070032from autotest_lib.server.hosts import remote
beeps687243d2013-07-18 15:29:27 -070033from autotest_lib.site_utils.graphite import stats
Simran Basidcff4252012-11-20 16:13:20 -080034from autotest_lib.site_utils.rpm_control_system import rpm_client
beepsdae65fd2013-07-26 16:24:41 -070035from autotest_lib.tko import utils as tko_utils
Simran Basid5e5e272012-09-24 15:23:59 -070036
37
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080038def _make_servo_hostname(hostname):
39 host_parts = hostname.split('.')
40 host_parts[0] = host_parts[0] + '-servo'
41 return '.'.join(host_parts)
42
43
44def _get_lab_servo(target_hostname):
45 """Instantiate a Servo for |target_hostname| in the lab.
46
47 Assuming that |target_hostname| is a device in the CrOS test
48 lab, create and return a Servo object pointed at the servo
49 attached to that DUT. The servo in the test lab is assumed
50 to already have servod up and running on it.
51
52 @param target_hostname: device whose servo we want to target.
53 @return an appropriately configured Servo instance.
54 """
55 servo_host = _make_servo_hostname(target_hostname)
56 if utils.host_is_in_lab_zone(servo_host):
57 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080058 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080059 except: # pylint: disable=W0702
60 # TODO(jrbarnette): Long-term, if we can't get to
61 # a servo in the lab, we want to fail, so we should
62 # pass any exceptions along. Short-term, we're not
63 # ready to rely on servo, so we ignore failures.
64 pass
65 return None
66
Aviv Keshet18ee3142013-08-12 15:01:51 -070067GLOBAL_SSH_COMMAND_OPTIONS = ''
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080068
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070069def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
70 connect_timeout=None, alive_interval=None):
71 """Override default make_ssh_command to use options tuned for Chrome OS.
72
73 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070074 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
75 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070076
Dale Curtisaa5eedb2011-08-23 16:18:52 -070077 - ServerAliveInterval=180; which causes SSH to ping connection every
78 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
79 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
80 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070081
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070082 - ServerAliveCountMax=3; consistency with remote_access.sh.
83
84 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
85 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070086
87 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
88 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070089
90 - SSH protocol forced to 2; needed for ServerAliveInterval.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080091
92 @param user User name to use for the ssh connection.
93 @param port Port on the target host to use for ssh connection.
94 @param opts Additional options to the ssh command.
95 @param hosts_file Ignored.
96 @param connect_timeout Ignored.
97 @param alive_interval Ignored.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070098 """
Aviv Keshet18ee3142013-08-12 15:01:51 -070099 base_command = ('/usr/bin/ssh -a -x %s %s -o StrictHostKeyChecking=no'
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700100 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -0700101 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
102 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
103 ' -o Protocol=2 -l %s -p %d')
Aviv Keshet18ee3142013-08-12 15:01:51 -0700104 return base_command % (GLOBAL_SSH_COMMAND_OPTIONS, opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700105
106
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800107
Aviv Keshet74c89a92013-02-04 15:18:30 -0800108def add_label_detector(label_function_list, label_list=None, label=None):
109 """Decorator used to group functions together into the provided list.
110 @param label_function_list: List of label detecting functions to add
111 decorated function to.
112 @param label_list: List of detectable labels to add detectable labels to.
113 (Default: None)
114 @param label: Label string that is detectable by this detection function
115 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800116 """
Simran Basic6f1f7a2012-10-16 10:47:46 -0700117 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -0800118 """
119 @param func: The function to be added as a detector.
120 """
121 label_function_list.append(func)
122 if label and label_list is not None:
123 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700124 return func
125 return add_func
126
127
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700128class SiteHost(remote.RemoteHost):
129 """Chromium OS specific subclass of Host."""
130
131 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -0500132 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700133
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800134 # Time to wait for new kernel to be marked successful after
135 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -0700136 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700137
Richard Barnette03a0c132012-11-05 12:40:35 -0800138 # Timeout values (in seconds) associated with various Chrome OS
139 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700140 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800141 # In general, a good rule of thumb is that the timeout can be up
142 # to twice the typical measured value on the slowest platform.
143 # The times here have not necessarily been empirically tested to
144 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700145 #
146 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800147 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
148 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700149 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800150 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800151 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700152 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800153 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800154 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800155 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700156 # REBOOT_TIMEOUT: How long to wait for a reboot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800157 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700158
159 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800160 RESUME_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700161 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700162 USB_BOOT_TIMEOUT = 150
Chris Sosab76e0ee2013-05-22 16:55:41 -0700163
164 # We have a long timeout to ensure we don't flakily fail due to other
165 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
166 REBOOT_TIMEOUT = 300
167
Richard Barnette03a0c132012-11-05 12:40:35 -0800168 _INSTALL_TIMEOUT = 240
169
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800170 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
171 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
172 _USB_POWER_TIMEOUT = 5
173 _POWER_CYCLE_TIMEOUT = 10
174
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800175
Richard Barnette82c35912012-11-20 10:09:10 -0800176 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
177 'rpm_recovery_boards', type=str).split(',')
178
179 _MAX_POWER_CYCLE_ATTEMPTS = 6
180 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
181 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
182 'host[0-9]+')
183 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
184 'in_illuminance0_raw',
185 'illuminance0_input']
186 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
187 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800188 _DETECTABLE_LABELS = []
189 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
190 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700191
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800192 # Constants used in ping_wait_up() and ping_wait_down().
193 #
194 # _PING_WAIT_COUNT is the approximate number of polling
195 # cycles to use when waiting for a host state change.
196 #
197 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
198 # for arguments to the internal _ping_wait_for_status()
199 # method.
200 _PING_WAIT_COUNT = 40
201 _PING_STATUS_DOWN = False
202 _PING_STATUS_UP = True
203
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800204 # Allowed values for the power_method argument.
205
206 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
207 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
208 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
209 POWER_CONTROL_RPM = 'RPM'
210 POWER_CONTROL_SERVO = 'servoj10'
211 POWER_CONTROL_MANUAL = 'manual'
212
213 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
214 POWER_CONTROL_SERVO,
215 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800216
Simran Basi5e6339a2013-03-21 11:34:32 -0700217 _RPM_OUTLET_CHANGED = 'outlet_changed'
218
beeps687243d2013-07-18 15:29:27 -0700219 # pylint: disable=E1120
220 _NOTIFY_ADDRESS = global_config.global_config.get_config_value(
beepsdae65fd2013-07-26 16:24:41 -0700221 'SCHEDULER', 'notify_email_errors', default='')
beeps687243d2013-07-18 15:29:27 -0700222
223 _SENDER_ADDRESS = global_config.global_config.get_config_value(
224 'SCHEDULER', "notify_email_from", default=getpass.getuser())
225
226 _ERROR_EMAIL_SUBJECT_FORMAT = 'job_repo_url changed for host %s'
227 _ERROR_EMAIL_MSG_FORMAT = ('While verifying the job_repo_url on %(host)s '
228 'the devserver changed from %(old_devserver)s '
229 'to %(new_devserver)s. This might indicate a '
beepsdae65fd2013-07-26 16:24:41 -0700230 'delay in job with id: %(job_id)s, re-staging '
231 'artifacts took an additional %(stage_time)s '
232 'seconds.')
beeps687243d2013-07-18 15:29:27 -0700233
J. Richard Barnette964fba02012-10-24 17:34:29 -0700234 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800235 def get_servo_arguments(args_dict):
236 """Extract servo options from `args_dict` and return the result.
237
238 Take the provided dictionary of argument options and return
239 a subset that represent standard arguments needed to
240 construct a servo object for a host. The intent is to
241 provide standard argument processing from run_remote_tests
242 for tests that require a servo to operate.
243
244 Recommended usage:
245 ~~~~~~~~
246 args_dict = utils.args_to_dict(args)
247 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
248 host = hosts.create_host(machine, servo_args=servo_args)
249 ~~~~~~~~
250
251 @param args_dict Dictionary from which to extract the servo
252 arguments.
253 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700254 servo_args = {}
255 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800256 if arg in args_dict:
257 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700258 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700259
J. Richard Barnette964fba02012-10-24 17:34:29 -0700260
261 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700262 """Initialize superclasses, and |self.servo|.
263
264 For creating the host servo object, there are three
265 possibilities: First, if the host is a lab system known to
266 have a servo board, we connect to that servo unconditionally.
267 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700268 servo features for testing, it will pass settings for
269 `servo_host`, `servo_port`, or both. If neither of these
270 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700271
272 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700273 super(SiteHost, self)._initialize(hostname=hostname,
274 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700275 # self.env is a dictionary of environment variable settings
276 # to be exported for commands run on the host.
277 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
278 # errors that might happen.
279 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700280 self._xmlrpc_proxy_map = {}
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800281 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700282 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700283 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700284
285
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500286 def get_repair_image_name(self):
287 """Generate a image_name from variables in the global config.
288
289 @returns a str of $board-version/$BUILD.
290
291 """
292 stable_version = global_config.global_config.get_config_value(
293 'CROS', 'stable_cros_version')
294 build_pattern = global_config.global_config.get_config_value(
295 'CROS', 'stable_build_pattern')
296 board = self._get_board_from_afe()
297 if board is None:
298 raise error.AutoservError('DUT has no board attribute, '
299 'cannot be repaired.')
300 return build_pattern % (board, stable_version)
301
302
Scott Zawalski62bacae2013-03-05 10:40:32 -0500303 def _host_in_AFE(self):
304 """Check if the host is an object the AFE knows.
305
306 @returns the host object.
307 """
308 return self._AFE.get_hosts(hostname=self.hostname)
309
310
Chris Sosab76e0ee2013-05-22 16:55:41 -0700311 def lookup_job_repo_url(self):
312 """Looks up the job_repo_url for the host.
313
314 @returns job_repo_url from AFE or None if not found.
315
316 @raises KeyError if the host does not have a job_repo_url
317 """
318 if not self._host_in_AFE():
319 return None
320
321 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700322 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
323 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700324
325
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500326 def clear_cros_version_labels_and_job_repo_url(self):
327 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500328 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400329 return
330
Scott Zawalski62bacae2013-03-05 10:40:32 -0500331 host_list = [self.hostname]
332 labels = self._AFE.get_labels(
333 name__startswith=ds_constants.VERSION_PREFIX,
334 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800335
Scott Zawalski62bacae2013-03-05 10:40:32 -0500336 for label in labels:
337 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500338
beepscb6f1e22013-06-28 19:14:10 -0700339 self.update_job_repo_url(None, None)
340
341
342 def update_job_repo_url(self, devserver_url, image_name):
343 """
344 Updates the job_repo_url host attribute and asserts it's value.
345
346 @param devserver_url: The devserver to use in the job_repo_url.
347 @param image_name: The name of the image to use in the job_repo_url.
348
349 @raises AutoservError: If we failed to update the job_repo_url.
350 """
351 repo_url = None
352 if devserver_url and image_name:
353 repo_url = tools.get_package_url(devserver_url, image_name)
354 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500355 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700356 if self.lookup_job_repo_url() != repo_url:
357 raise error.AutoservError('Failed to update job_repo_url with %s, '
358 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500359
360
Dan Shie9309262013-06-19 22:50:21 -0700361 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400362 """Add cros_version labels and host attribute job_repo_url.
363
364 @param image_name: The name of the image e.g.
365 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700366
Scott Zawalskieadbf702013-03-14 09:23:06 -0400367 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500368 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400369 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500370
Scott Zawalskieadbf702013-03-14 09:23:06 -0400371 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700372 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500373
374 labels = self._AFE.get_labels(name=cros_label)
375 if labels:
376 label = labels[0]
377 else:
378 label = self._AFE.create_label(name=cros_label)
379
380 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700381 self.update_job_repo_url(devserver_url, image_name)
382
383
beepsdae65fd2013-07-26 16:24:41 -0700384 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700385 """
386 Make sure job_repo_url of this host is valid.
387
joychen03eaad92013-06-26 09:55:21 -0700388 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700389 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
390 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
391 download and extract it. If the devserver embedded in the url is
392 unresponsive, update the job_repo_url of the host after staging it on
393 another devserver.
394
395 @param job_repo_url: A url pointing to the devserver where the autotest
396 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700397 @param tag: The tag from the server job, in the format
398 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700399
400 @raises DevServerException: If we could not resolve a devserver.
401 @raises AutoservError: If we're unable to save the new job_repo_url as
402 a result of choosing a new devserver because the old one failed to
403 respond to a health check.
404 """
405 job_repo_url = self.lookup_job_repo_url()
406 if not job_repo_url:
407 logging.warning('No job repo url set on host %s', self.hostname)
408 return
409
410 logging.info('Verifying job repo url %s', job_repo_url)
411 devserver_url, image_name = tools.get_devserver_build_from_package_url(
412 job_repo_url)
413
414 ds = dev_server.ImageServer.resolve(image_name)
beeps687243d2013-07-18 15:29:27 -0700415 new_devserver_url = ds.url()
beepscb6f1e22013-06-28 19:14:10 -0700416
417 logging.info('Staging autotest artifacts for %s on devserver %s',
418 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700419
420 start_time = time.time()
beepscb6f1e22013-06-28 19:14:10 -0700421 ds.stage_artifacts(image_name, ['autotest'])
beeps687243d2013-07-18 15:29:27 -0700422 stage_time = time.time() - start_time
423
424 # Record how much of the verification time comes from a devserver
425 # restage. If we're doing things right we should not see multiple
426 # devservers for a given board/build/branch path.
427 try:
428 board, build_type, branch = site_utils.ParseBuildName(
429 image_name)[:3]
430 except site_utils.ParseBuildNameException as e:
431 pass
432 else:
433 new_devserver = new_devserver_url[
434 new_devserver_url.find('/')+2:new_devserver_url.rfind(':')]
435 stats_key = {
436 'board': board,
437 'build_type': build_type,
438 'branch': branch,
439 'devserver': new_devserver.replace('.', '_'),
440 }
441 stats.Gauge('verify_job_repo_url').send(
442 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
443 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700444
445 if ds.url() != devserver_url:
beepsdae65fd2013-07-26 16:24:41 -0700446
447 # Since this is only to add traceability to devserver flakes we
448 # make a best effort attempt at getting the job id by parsing the
449 # tag.
450 try:
451 job_id = tko_utils.get_afe_job_id(tag)
452 except ValueError:
453 logging.debug('Could not determine job id from tag %s', tag)
454 job_id = 'NA'
455
beeps687243d2013-07-18 15:29:27 -0700456 error_dict = {
457 'host': self.hostname,
458 'old_devserver': devserver_url,
459 'new_devserver': new_devserver_url,
beepsdae65fd2013-07-26 16:24:41 -0700460 'job_id': job_id,
beeps687243d2013-07-18 15:29:27 -0700461 'stage_time': stage_time,
462 }
463 try:
464 mail.send(self._SENDER_ADDRESS, self._NOTIFY_ADDRESS, '',
465 self._ERROR_EMAIL_SUBJECT_FORMAT % self.hostname,
466 self._ERROR_EMAIL_MSG_FORMAT % error_dict)
467 except smtplib.SMTPDataError:
468 logging.warning(self._ERROR_EMAIL_MSG_FORMAT, error_dict)
469
beepscb6f1e22013-06-28 19:14:10 -0700470 self.update_job_repo_url(ds.url(), image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400471
472
Dan Shi0f466e82013-02-22 15:44:58 -0800473 def _try_stateful_update(self, update_url, force_update, updater):
474 """Try to use stateful update to initialize DUT.
475
476 When DUT is already running the same version that machine_install
477 tries to install, stateful update is a much faster way to clean up
478 the DUT for testing, compared to a full reimage. It is implemeted
479 by calling autoupdater.run_update, but skipping updating root, as
480 updating the kernel is time consuming and not necessary.
481
482 @param update_url: url of the image.
483 @param force_update: Set to True to update the image even if the DUT
484 is running the same version.
485 @param updater: ChromiumOSUpdater instance used to update the DUT.
486 @returns: True if the DUT was updated with stateful update.
487
488 """
489 if not updater.check_version():
490 return False
491 if not force_update:
492 logging.info('Canceling stateful update because the new and '
493 'old versions are the same.')
494 return False
495 # Following folders should be rebuilt after stateful update.
496 # A test file is used to confirm each folder gets rebuilt after
497 # the stateful update.
498 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
499 test_file = '.test_file_to_be_deleted'
500 for folder in folders_to_check:
501 touch_path = os.path.join(folder, test_file)
502 self.run('touch %s' % touch_path)
503
504 if not updater.run_update(force_update=True, update_root=False):
505 return False
506
507 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700508 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800509 check_file_cmd = 'test -f %s; echo $?'
510 for folder in folders_to_check:
511 test_file_path = os.path.join(folder, test_file)
512 result = self.run(check_file_cmd % test_file_path,
513 ignore_status=True)
514 if result.exit_status == 1:
515 return False
516 return True
517
518
J. Richard Barnette7275b612013-06-04 18:13:11 -0700519 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800520 """After the DUT is updated, confirm machine_install succeeded.
521
522 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700523 @param expected_kernel: kernel expected to be active after reboot,
524 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800525
526 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700527 # Touch the lab machine file to leave a marker that
528 # distinguishes this image from other test images.
529 # Afterwards, we must re-run the autoreboot script because
530 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800531 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800532 self.run('start autoreboot')
533
J. Richard Barnette7275b612013-06-04 18:13:11 -0700534 # Figure out the newly active kernel.
535 active_kernel, _ = updater.get_kernel_state()
536
537 # Check for rollback due to a bad build.
538 if expected_kernel and active_kernel != expected_kernel:
539 # Print out some information to make it easier to debug
540 # the rollback.
Dan Shi0f466e82013-02-22 15:44:58 -0800541 logging.debug('Dumping partition table.')
Dan Shi346725f2013-03-20 15:22:38 -0700542 self.run('cgpt show $(rootdev -s -d)')
Dan Shi0f466e82013-02-22 15:44:58 -0800543 logging.debug('Dumping crossystem for firmware debugging.')
Dan Shi346725f2013-03-20 15:22:38 -0700544 self.run('crossystem --all')
Dan Shi0f466e82013-02-22 15:44:58 -0800545 raise autoupdater.ChromiumOSError(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700546 'Build %s failed to boot on %s; system rolled back '
547 'to previous build' % (updater.update_version,
548 self.hostname))
Dan Shi0f466e82013-02-22 15:44:58 -0800549
J. Richard Barnette7275b612013-06-04 18:13:11 -0700550 # Check that we've got the build we meant to install.
551 if not updater.check_version_to_confirm_install():
552 raise autoupdater.ChromiumOSError(
553 'Failed to update %s to build %s; found build '
554 '%s instead' % (self.hostname,
555 updater.update_version,
556 updater.get_build_id()))
Scott Zawalski62bacae2013-03-05 10:40:32 -0500557
J. Richard Barnette7275b612013-06-04 18:13:11 -0700558 # Make sure chromeos-setgoodkernel runs.
559 try:
Dan Shi0f466e82013-02-22 15:44:58 -0800560 utils.poll_for_condition(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700561 lambda: (updater.get_kernel_tries(active_kernel) == 0
562 and updater.get_kernel_success(active_kernel)),
563 exception=autoupdater.ChromiumOSError(),
Dan Shi0f466e82013-02-22 15:44:58 -0800564 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
J. Richard Barnette7275b612013-06-04 18:13:11 -0700565 except autoupdater.ChromiumOSError as e:
566 services_status = self.run('status system-services').stdout
567 if services_status != 'system-services start/running\n':
568 event = ('Chrome failed to reach login screen')
569 else:
570 event = ('update-engine failed to call '
571 'chromeos-setgoodkernel')
572 raise autoupdater.ChromiumOSError(
573 'After update and reboot, %s '
574 'within %d seconds' % (event,
575 self._KERNEL_UPDATE_TIMEOUT))
Dan Shi0f466e82013-02-22 15:44:58 -0800576
577
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700578 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400579 """Stage a build on a devserver and return the update_url.
580
581 @param image_name: a name like lumpy-release/R27-3837.0.0
582 @returns an update URL like:
583 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
584 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700585 if not image_name:
586 image_name = self.get_repair_image_name()
587 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400588 devserver = dev_server.ImageServer.resolve(image_name)
589 devserver.trigger_download(image_name, synchronous=False)
590 return tools.image_url_pattern() % (devserver.url(), image_name)
591
592
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700593 def stage_image_for_servo(self, image_name=None):
594 """Stage a build on a devserver and return the update_url.
595
596 @param image_name: a name like lumpy-release/R27-3837.0.0
597 @returns an update URL like:
598 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
599 """
600 if not image_name:
601 image_name = self.get_repair_image_name()
602 logging.info('Staging build for servo install: %s', image_name)
603 devserver = dev_server.ImageServer.resolve(image_name)
604 devserver.stage_artifacts(image_name, ['test_image'])
605 return devserver.get_test_image_url(image_name)
606
607
Chris Sosaa3ac2152012-05-23 22:23:13 -0700608 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500609 local_devserver=False, repair=False):
610 """Install the DUT.
611
Dan Shi0f466e82013-02-22 15:44:58 -0800612 Use stateful update if the DUT is already running the same build.
613 Stateful update does not update kernel and tends to run much faster
614 than a full reimage. If the DUT is running a different build, or it
615 failed to do a stateful update, full update, including kernel update,
616 will be applied to the DUT.
617
Scott Zawalskieadbf702013-03-14 09:23:06 -0400618 Once a host enters machine_install its cros_version label will be
619 removed as well as its host attribute job_repo_url (used for
620 package install).
621
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500622 @param update_url: The url to use for the update
623 pattern: http://$devserver:###/update/$build
624 If update_url is None and repair is True we will install the
625 stable image listed in global_config under
626 CROS.stable_cros_version.
627 @param force_update: Force an update even if the version installed
628 is the same. Default:False
629 @param local_devserver: Used by run_remote_test to allow people to
630 use their local devserver. Default: False
631 @param repair: Whether or not we are in repair mode. This adds special
632 cases for repairing a machine like starting update_engine.
633 Setting repair to True sets force_update to True as well.
634 default: False
635 @raises autoupdater.ChromiumOSError
636
637 """
Dan Shi7458bf62013-06-10 12:50:16 -0700638 if update_url:
639 logging.debug('update url is set to %s', update_url)
640 else:
641 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700642 if self._parser.options.image:
643 requested_build = self._parser.options.image
644 if requested_build.startswith('http://'):
645 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700646 logging.debug('update url is retrieved from requested_build'
647 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700648 else:
649 # Try to stage any build that does not start with
650 # http:// on the devservers defined in
651 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700652 update_url = self._stage_image_for_update(requested_build)
653 logging.debug('Build staged, and update_url is set to: %s',
654 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700655 elif repair:
656 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700657 logging.debug('Build staged, and update_url is set to: %s',
658 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400659 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700660 raise autoupdater.ChromiumOSError(
661 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500662
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500663 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800664 # In case the system is in a bad state, we always reboot the machine
665 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700666 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500667 self.run('stop update-engine; start update-engine')
668 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800669
Chris Sosaa3ac2152012-05-23 22:23:13 -0700670 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700671 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800672 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400673 # Remove cros-version and job_repo_url host attribute from host.
674 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800675 # If the DUT is already running the same build, try stateful update
676 # first. Stateful update does not update kernel and tends to run much
677 # faster than a full reimage.
678 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700679 updated = self._try_stateful_update(
680 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800681 if updated:
682 logging.info('DUT is updated with stateful update.')
683 except Exception as e:
684 logging.exception(e)
685 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700686
Dan Shi0f466e82013-02-22 15:44:58 -0800687 inactive_kernel = None
688 # Do a full update if stateful update is not applicable or failed.
689 if not updated:
690 # In case the system is in a bad state, we always reboot the
691 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700692 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700693
694 # TODO(sosa): Remove temporary hack to get rid of bricked machines
695 # that can't update due to a corrupted policy.
696 self.run('rm -rf /var/lib/whitelist')
697 self.run('touch /var/lib/whitelist')
698 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400699 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700700
Dan Shi0f466e82013-02-22 15:44:58 -0800701 if updater.run_update(force_update):
702 updated = True
703 # Figure out active and inactive kernel.
704 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700705
Dan Shi0f466e82013-02-22 15:44:58 -0800706 # Ensure inactive kernel has higher priority than active.
707 if (updater.get_kernel_priority(inactive_kernel)
708 < updater.get_kernel_priority(active_kernel)):
709 raise autoupdater.ChromiumOSError(
710 'Update failed. The priority of the inactive kernel'
711 ' partition is less than that of the active kernel'
712 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700713
Dan Shi0f466e82013-02-22 15:44:58 -0800714 update_engine_log = '/var/log/update_engine.log'
715 logging.info('Dumping %s', update_engine_log)
716 self.run('cat %s' % update_engine_log)
717 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700718 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700719
Dan Shi0f466e82013-02-22 15:44:58 -0800720 if updated:
721 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400722 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700723 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800724
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700725 # Clean up any old autotest directories which may be lying around.
726 for path in global_config.global_config.get_config_value(
727 'AUTOSERV', 'client_autodir_paths', type=list):
728 self.run('rm -rf ' + path)
729
730
Richard Barnette82c35912012-11-20 10:09:10 -0800731 def _get_board_from_afe(self):
732 """Retrieve this host's board from its labels in the AFE.
733
734 Looks for a host label of the form "board:<board>", and
735 returns the "<board>" part of the label. `None` is returned
736 if there is not a single, unique label matching the pattern.
737
738 @returns board from label, or `None`.
739 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700740 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800741
742
743 def get_build(self):
744 """Retrieve the current build for this Host from the AFE.
745
746 Looks through this host's labels in the AFE to determine its build.
747
748 @returns The current build or None if it could not find it or if there
749 were multiple build labels assigned to this host.
750 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700751 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800752
753
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500754 def _install_repair(self):
755 """Attempt to repair this host using upate-engine.
756
757 If the host is up, try installing the DUT with a stable
758 "repair" version of Chrome OS as defined in the global_config
759 under CROS.stable_cros_version.
760
Scott Zawalski62bacae2013-03-05 10:40:32 -0500761 @raises AutoservRepairMethodNA if the DUT is not reachable.
762 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500763
764 """
765 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500766 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500767
768 logging.info('Attempting to reimage machine to repair image.')
769 try:
770 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700771 except autoupdater.ChromiumOSError as e:
772 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500773 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500774 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500775
776
Scott Zawalski62bacae2013-03-05 10:40:32 -0500777 def servo_install(self, image_url=None):
778 """
779 Re-install the OS on the DUT by:
780 1) installing a test image on a USB storage device attached to the Servo
781 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800782 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700783 3) installing the image with chromeos-install.
784
Scott Zawalski62bacae2013-03-05 10:40:32 -0500785 @param image_url: If specified use as the url to install on the DUT.
786 otherwise boot the currently staged image on the USB stick.
Richard Barnette03a0c132012-11-05 12:40:35 -0800787
Scott Zawalski62bacae2013-03-05 10:40:32 -0500788 @raises AutoservError if the image fails to boot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800789 """
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700790 self.servo.install_recovery_image(image_url)
Richard Barnette03a0c132012-11-05 12:40:35 -0800791 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500792 raise error.AutoservRepairFailure(
793 'DUT failed to boot from USB after %d seconds' %
794 self.USB_BOOT_TIMEOUT)
795
796 self.run('chromeos-install --yes', timeout=self._INSTALL_TIMEOUT)
Richard Barnette03a0c132012-11-05 12:40:35 -0800797 self.servo.power_long_press()
Fang Dengafb88142013-05-30 17:44:31 -0700798 self.servo.switch_usbkey('off')
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700799 # We *must* use power_on() here; on Parrot it's how we get
800 # out of recovery mode.
801 self.servo.get_power_state_controller().power_on()
Richard Barnette03a0c132012-11-05 12:40:35 -0800802 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
803 raise error.AutoservError('DUT failed to reboot installed '
804 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500805 self.BOOT_TIMEOUT)
806
807
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700808 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500809 """Reinstall the DUT utilizing servo and a test image.
810
811 Re-install the OS on the DUT by:
812 1) installing a test image on a USB storage device attached to the Servo
813 board,
814 2) booting that image in recovery mode, and then
815 3) installing the image with chromeos-install.
816
Scott Zawalski62bacae2013-03-05 10:40:32 -0500817 @raises AutoservRepairMethodNA if the device does not have servo
818 support.
819
820 """
821 if not self.servo:
822 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
823 'DUT has no servo support.')
824
825 logging.info('Attempting to recovery servo enabled device with '
826 'servo_repair_reinstall')
827
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700828 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500829 self.servo_install(image_url)
830
831
832 def _servo_repair_power(self):
833 """Attempt to repair DUT using an attached Servo.
834
835 Attempt to power on the DUT via power_long_press.
836
837 @raises AutoservRepairMethodNA if the device does not have servo
838 support.
839 @raises AutoservRepairFailure if the repair fails for any reason.
840 """
841 if not self.servo:
842 raise error.AutoservRepairMethodNA('Repair Power NA: '
843 'DUT has no servo support.')
844
845 logging.info('Attempting to recover servo enabled device by '
846 'powering it off and on.')
847 self.servo.get_power_state_controller().power_off()
848 self.servo.get_power_state_controller().power_on()
849 if self.wait_up(self.BOOT_TIMEOUT):
850 return
851
852 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800853
854
Richard Barnette82c35912012-11-20 10:09:10 -0800855 def _powercycle_to_repair(self):
856 """Utilize the RPM Infrastructure to bring the host back up.
857
858 If the host is not up/repaired after the first powercycle we utilize
859 auto fallback to the last good install by powercycling and rebooting the
860 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500861
862 @raises AutoservRepairMethodNA if the device does not support remote
863 power.
864 @raises AutoservRepairFailure if the repair fails for any reason.
865
Richard Barnette82c35912012-11-20 10:09:10 -0800866 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500867 if not self.has_power():
868 raise error.AutoservRepairMethodNA('Device does not support power.')
869
Richard Barnette82c35912012-11-20 10:09:10 -0800870 logging.info('Attempting repair via RPM powercycle.')
871 failed_cycles = 0
872 self.power_cycle()
873 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
874 failed_cycles += 1
875 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -0500876 raise error.AutoservRepairFailure(
877 'Powercycled host %s %d times; device did not come back'
878 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -0800879 self.power_cycle()
880 if failed_cycles == 0:
881 logging.info('Powercycling was successful first time.')
882 else:
883 logging.info('Powercycling was successful after %d failures.',
884 failed_cycles)
885
886
887 def repair_full(self):
888 """Repair a host for repair level NO_PROTECTION.
889
890 This overrides the base class function for repair; it does
891 not call back to the parent class, but instead offers a
892 simplified implementation based on the capabilities in the
893 Chrome OS test lab.
894
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700895 If `self.verify()` fails, the following procedures are
896 attempted:
897 1. Try to re-install to a known stable image using
898 auto-update.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500899 2. If there's a servo for the DUT, try to power the DUT off and
900 on.
901 3. If there's a servo for the DUT, try to re-install via
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700902 the servo.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500903 4. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800904 by power-cycling.
905
906 As with the parent method, the last operation performed on
907 the DUT must be to call `self.verify()`; if that call fails,
908 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700909
Scott Zawalski62bacae2013-03-05 10:40:32 -0500910 @raises AutoservRepairTotalFailure if the repair process fails to
911 fix the DUT.
Richard Barnette82c35912012-11-20 10:09:10 -0800912 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500913 # TODO(scottz): This should use something similar to label_decorator,
914 # but needs to be populated in order so DUTs are repaired with the
915 # least amount of effort.
916 repair_funcs = [self._install_repair, self._servo_repair_power,
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700917 self._servo_repair_reinstall,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500918 self._powercycle_to_repair]
919 errors = []
920 for repair_func in repair_funcs:
921 try:
922 repair_func()
923 self.verify()
924 return
925 except Exception as e:
926 logging.warn('Failed to repair device: %s', e)
927 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500928
Scott Zawalski62bacae2013-03-05 10:40:32 -0500929 raise error.AutoservRepairTotalFailure(
930 'All attempts at repairing the device failed:\n%s' %
931 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -0800932
933
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700934 def close(self):
935 super(SiteHost, self).close()
936 self.xmlrpc_disconnect_all()
937
938
Simran Basi5e6339a2013-03-21 11:34:32 -0700939 def _cleanup_poweron(self):
940 """Special cleanup method to make sure hosts always get power back."""
941 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
942 hosts = afe.get_hosts(hostname=self.hostname)
943 if not hosts or not (self._RPM_OUTLET_CHANGED in
944 hosts[0].attributes):
945 return
946 logging.debug('This host has recently interacted with the RPM'
947 ' Infrastructure. Ensuring power is on.')
948 try:
949 self.power_on()
950 except rpm_client.RemotePowerException:
951 # If cleanup has completed but there was an issue with the RPM
952 # Infrastructure, log an error message rather than fail cleanup
953 logging.error('Failed to turn Power On for this host after '
954 'cleanup through the RPM Infrastructure.')
955 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
956 hostname=self.hostname)
957
958
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700959 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700960 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800961 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500962 try:
963 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
964 '_clear_login_prompt_state')
965 self.run('restart ui')
966 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
967 '_wait_for_login_prompt')
Alex Millerf4517962013-02-25 15:03:02 -0800968 except (error.AutotestRunError, error.AutoservRunError):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500969 logging.warn('Unable to restart ui, rebooting device.')
970 # Since restarting the UI fails fall back to normal Autotest
971 # cleanup routines, i.e. reboot the machine.
972 super(SiteHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -0700973 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -0700974 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -0700975 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700976
977
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700978 def reboot(self, **dargs):
979 """
980 This function reboots the site host. The more generic
981 RemoteHost.reboot() performs sync and sleeps for 5
982 seconds. This is not necessary for Chrome OS devices as the
983 sync should be finished in a short time during the reboot
984 command.
985 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800986 if 'reboot_cmd' not in dargs:
987 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
988 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700989 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800990 if 'fastsync' not in dargs:
991 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700992 super(SiteHost, self).reboot(**dargs)
993
994
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700995 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800996 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700997
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800998 Tests for the following conditions:
999 1. All conditions tested by the parent version of this
1000 function.
1001 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001002 3. Sufficient space in /mnt/stateful_partition/encrypted.
1003 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001004
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001005 """
1006 super(SiteHost, self).verify_software()
1007 self.check_diskspace(
1008 '/mnt/stateful_partition',
1009 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001010 'SERVER', 'gb_diskspace_required', type=float,
1011 default=20.0))
1012 self.check_diskspace(
1013 '/mnt/stateful_partition/encrypted',
1014 global_config.global_config.get_config_value(
1015 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1016 default=0.1))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001017 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001018 # Makes sure python is present, loads and can use built in functions.
1019 # We have seen cases where importing cPickle fails with undefined
1020 # symbols in cPickle.so.
1021 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001022
1023
Christopher Wileyd78249a2013-03-01 13:05:31 -08001024 def xmlrpc_connect(self, command, port, command_name=None,
1025 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001026 """Connect to an XMLRPC server on the host.
1027
1028 The `command` argument should be a simple shell command that
1029 starts an XMLRPC server on the given `port`. The command
1030 must not daemonize, and must terminate cleanly on SIGTERM.
1031 The command is started in the background on the host, and a
1032 local XMLRPC client for the server is created and returned
1033 to the caller.
1034
1035 Note that the process of creating an XMLRPC client makes no
1036 attempt to connect to the remote server; the caller is
1037 responsible for determining whether the server is running
1038 correctly, and is ready to serve requests.
1039
Christopher Wileyd78249a2013-03-01 13:05:31 -08001040 Optionally, the caller can pass ready_test_name, a string
1041 containing the name of a method to call on the proxy. This
1042 method should take no parameters and return successfully only
1043 when the server is ready to process client requests. When
1044 ready_test_name is set, xmlrpc_connect will block until the
1045 proxy is ready, and throw a TestError if the server isn't
1046 ready by timeout_seconds.
1047
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001048 @param command Shell command to start the server.
1049 @param port Port number on which the server is expected to
1050 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001051 @param command_name String to use as input to `pkill` to
1052 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001053 @param ready_test_name String containing the name of a
1054 method defined on the XMLRPC server.
1055 @param timeout_seconds Number of seconds to wait
1056 for the server to become 'ready.' Will throw a
1057 TestFail error if server is not ready in time.
1058
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001059 """
1060 self.xmlrpc_disconnect(port)
1061
1062 # Chrome OS on the target closes down most external ports
1063 # for security. We could open the port, but doing that
1064 # would conflict with security tests that check that only
1065 # expected ports are open. So, to get to the port on the
1066 # target we use an ssh tunnel.
1067 local_port = utils.get_unused_port()
1068 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1069 ssh_cmd = make_ssh_command(opts=tunnel_options)
1070 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1071 logging.debug('Full tunnel command: %s', tunnel_cmd)
1072 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1073 logging.debug('Started XMLRPC tunnel, local = %d'
1074 ' remote = %d, pid = %d',
1075 local_port, port, tunnel_proc.pid)
1076
1077 # Start the server on the host. Redirection in the command
1078 # below is necessary, because 'ssh' won't terminate until
1079 # background child processes close stdin, stdout, and
1080 # stderr.
1081 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
1082 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
1083 logging.debug('Started XMLRPC server on host %s, pid = %s',
1084 self.hostname, remote_pid)
1085
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001086 self._xmlrpc_proxy_map[port] = (command_name, tunnel_proc)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001087 rpc_url = 'http://localhost:%d' % local_port
Christopher Wileyd78249a2013-03-01 13:05:31 -08001088 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
1089 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001090 # retry.retry logs each attempt; calculate delay_sec to
1091 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001092 @retry.retry((socket.error,
1093 xmlrpclib.ProtocolError,
1094 httplib.BadStatusLine),
Christopher Wileyd78249a2013-03-01 13:05:31 -08001095 timeout_min=timeout_seconds/60.0,
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001096 delay_sec=min(max(timeout_seconds/20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001097 def ready_test():
1098 """ Call proxy.ready_test_name(). """
1099 getattr(proxy, ready_test_name)()
1100 successful = False
1101 try:
1102 logging.info('Waiting %d seconds for XMLRPC server '
1103 'to start.', timeout_seconds)
1104 ready_test()
1105 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001106 finally:
1107 if not successful:
1108 logging.error('Failed to start XMLRPC server.')
1109 self.xmlrpc_disconnect(port)
1110 logging.info('XMLRPC server started successfully.')
1111 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001112
1113 def xmlrpc_disconnect(self, port):
1114 """Disconnect from an XMLRPC server on the host.
1115
1116 Terminates the remote XMLRPC server previously started for
1117 the given `port`. Also closes the local ssh tunnel created
1118 for the connection to the host. This function does not
1119 directly alter the state of a previously returned XMLRPC
1120 client object; however disconnection will cause all
1121 subsequent calls to methods on the object to fail.
1122
1123 This function does nothing if requested to disconnect a port
1124 that was not previously connected via `self.xmlrpc_connect()`
1125
1126 @param port Port number passed to a previous call to
1127 `xmlrpc_connect()`
1128 """
1129 if port not in self._xmlrpc_proxy_map:
1130 return
1131 entry = self._xmlrpc_proxy_map[port]
1132 remote_name = entry[0]
1133 tunnel_proc = entry[1]
1134 if remote_name:
1135 # We use 'pkill' to find our target process rather than
1136 # a PID, because the host may have rebooted since
1137 # connecting, and we don't want to kill an innocent
1138 # process with the same PID.
1139 #
1140 # 'pkill' helpfully exits with status 1 if no target
1141 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001142 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001143 # status.
1144 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
1145
1146 if tunnel_proc.poll() is None:
1147 tunnel_proc.terminate()
1148 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1149 else:
1150 logging.debug('Tunnel pid %d terminated early, status %d',
1151 tunnel_proc.pid, tunnel_proc.returncode)
1152 del self._xmlrpc_proxy_map[port]
1153
1154
1155 def xmlrpc_disconnect_all(self):
1156 """Disconnect all known XMLRPC proxy ports."""
1157 for port in self._xmlrpc_proxy_map.keys():
1158 self.xmlrpc_disconnect(port)
1159
1160
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001161 def _ping_check_status(self, status):
1162 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001163
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001164 @param status Check the ping status against this value.
1165 @return True iff `status` and the result of ping are the same
1166 (i.e. both True or both False).
1167
1168 """
1169 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1170 return not (status ^ (ping_val == 0))
1171
1172 def _ping_wait_for_status(self, status, timeout):
1173 """Wait for the host to have a given status (UP or DOWN).
1174
1175 Status is checked by polling. Polling will not last longer
1176 than the number of seconds in `timeout`. The polling
1177 interval will be long enough that only approximately
1178 _PING_WAIT_COUNT polling cycles will be executed, subject
1179 to a maximum interval of about one minute.
1180
1181 @param status Waiting will stop immediately if `ping` of the
1182 host returns this status.
1183 @param timeout Poll for at most this many seconds.
1184 @return True iff the host status from `ping` matched the
1185 requested status at the time of return.
1186
1187 """
1188 # _ping_check_status() takes about 1 second, hence the
1189 # "- 1" in the formula below.
1190 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1191 end_time = time.time() + timeout
1192 while time.time() <= end_time:
1193 if self._ping_check_status(status):
1194 return True
1195 if poll_interval > 0:
1196 time.sleep(poll_interval)
1197
1198 # The last thing we did was sleep(poll_interval), so it may
1199 # have been too long since the last `ping`. Check one more
1200 # time, just to be sure.
1201 return self._ping_check_status(status)
1202
1203 def ping_wait_up(self, timeout):
1204 """Wait for the host to respond to `ping`.
1205
1206 N.B. This method is not a reliable substitute for
1207 `wait_up()`, because a host that responds to ping will not
1208 necessarily respond to ssh. This method should only be used
1209 if the target DUT can be considered functional even if it
1210 can't be reached via ssh.
1211
1212 @param timeout Minimum time to allow before declaring the
1213 host to be non-responsive.
1214 @return True iff the host answered to ping before the timeout.
1215
1216 """
1217 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001218
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001219 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001220 """Wait until the host no longer responds to `ping`.
1221
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001222 This function can be used as a slightly faster version of
1223 `wait_down()`, by avoiding potentially long ssh timeouts.
1224
1225 @param timeout Minimum time to allow for the host to become
1226 non-responsive.
1227 @return True iff the host quit answering ping before the
1228 timeout.
1229
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001230 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001231 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001232
1233 def test_wait_for_sleep(self):
1234 """Wait for the client to enter low-power sleep mode.
1235
1236 The test for "is asleep" can't distinguish a system that is
1237 powered off; to confirm that the unit was asleep, it is
1238 necessary to force resume, and then call
1239 `test_wait_for_resume()`.
1240
1241 This function is expected to be called from a test as part
1242 of a sequence like the following:
1243
1244 ~~~~~~~~
1245 boot_id = host.get_boot_id()
1246 # trigger sleep on the host
1247 host.test_wait_for_sleep()
1248 # trigger resume on the host
1249 host.test_wait_for_resume(boot_id)
1250 ~~~~~~~~
1251
1252 @exception TestFail The host did not go to sleep within
1253 the allowed time.
1254 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001255 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001256 raise error.TestFail(
1257 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001258 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001259
1260
1261 def test_wait_for_resume(self, old_boot_id):
1262 """Wait for the client to resume from low-power sleep mode.
1263
1264 The `old_boot_id` parameter should be the value from
1265 `get_boot_id()` obtained prior to entering sleep mode. A
1266 `TestFail` exception is raised if the boot id changes.
1267
1268 See @ref test_wait_for_sleep for more on this function's
1269 usage.
1270
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001271 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001272 target host went to sleep.
1273
1274 @exception TestFail The host did not respond within the
1275 allowed time.
1276 @exception TestFail The host responded, but the boot id test
1277 indicated a reboot rather than a sleep
1278 cycle.
1279 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001280 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001281 raise error.TestFail(
1282 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001283 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001284 else:
1285 new_boot_id = self.get_boot_id()
1286 if new_boot_id != old_boot_id:
1287 raise error.TestFail(
1288 'client rebooted, but sleep was expected'
1289 ' (old boot %s, new boot %s)'
1290 % (old_boot_id, new_boot_id))
1291
1292
1293 def test_wait_for_shutdown(self):
1294 """Wait for the client to shut down.
1295
1296 The test for "has shut down" can't distinguish a system that
1297 is merely asleep; to confirm that the unit was down, it is
1298 necessary to force boot, and then call test_wait_for_boot().
1299
1300 This function is expected to be called from a test as part
1301 of a sequence like the following:
1302
1303 ~~~~~~~~
1304 boot_id = host.get_boot_id()
1305 # trigger shutdown on the host
1306 host.test_wait_for_shutdown()
1307 # trigger boot on the host
1308 host.test_wait_for_boot(boot_id)
1309 ~~~~~~~~
1310
1311 @exception TestFail The host did not shut down within the
1312 allowed time.
1313 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001314 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001315 raise error.TestFail(
1316 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001317 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001318
1319
1320 def test_wait_for_boot(self, old_boot_id=None):
1321 """Wait for the client to boot from cold power.
1322
1323 The `old_boot_id` parameter should be the value from
1324 `get_boot_id()` obtained prior to shutting down. A
1325 `TestFail` exception is raised if the boot id does not
1326 change. The boot id test is omitted if `old_boot_id` is not
1327 specified.
1328
1329 See @ref test_wait_for_shutdown for more on this function's
1330 usage.
1331
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001332 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001333 shut down.
1334
1335 @exception TestFail The host did not respond within the
1336 allowed time.
1337 @exception TestFail The host responded, but the boot id test
1338 indicated that there was no reboot.
1339 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001340 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001341 raise error.TestFail(
1342 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001343 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001344 elif old_boot_id:
1345 if self.get_boot_id() == old_boot_id:
1346 raise error.TestFail(
1347 'client is back up, but did not reboot'
1348 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001349
1350
1351 @staticmethod
1352 def check_for_rpm_support(hostname):
1353 """For a given hostname, return whether or not it is powered by an RPM.
1354
1355 @return None if this host does not follows the defined naming format
1356 for RPM powered DUT's in the lab. If it does follow the format,
1357 it returns a regular expression MatchObject instead.
1358 """
Richard Barnette82c35912012-11-20 10:09:10 -08001359 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001360
1361
1362 def has_power(self):
1363 """For this host, return whether or not it is powered by an RPM.
1364
1365 @return True if this host is in the CROS lab and follows the defined
1366 naming format.
1367 """
1368 return SiteHost.check_for_rpm_support(self.hostname)
1369
1370
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001371 def _set_power(self, state, power_method):
1372 """Sets the power to the host via RPM, Servo or manual.
1373
1374 @param state Specifies which power state to set to DUT
1375 @param power_method Specifies which method of power control to
1376 use. By default "RPM" will be used. Valid values
1377 are the strings "RPM", "manual", "servoj10".
1378
1379 """
1380 ACCEPTABLE_STATES = ['ON', 'OFF']
1381
1382 if state.upper() not in ACCEPTABLE_STATES:
1383 raise error.TestError('State must be one of: %s.'
1384 % (ACCEPTABLE_STATES,))
1385
1386 if power_method == self.POWER_CONTROL_SERVO:
1387 logging.info('Setting servo port J10 to %s', state)
1388 self.servo.set('prtctl3_pwren', state.lower())
1389 time.sleep(self._USB_POWER_TIMEOUT)
1390 elif power_method == self.POWER_CONTROL_MANUAL:
1391 logging.info('You have %d seconds to set the AC power to %s.',
1392 self._POWER_CYCLE_TIMEOUT, state)
1393 time.sleep(self._POWER_CYCLE_TIMEOUT)
1394 else:
1395 if not self.has_power():
1396 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001397 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1398 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1399 hostname=self.hostname)
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001400 rpm_client.set_power(self.hostname, state.upper())
Simran Basid5e5e272012-09-24 15:23:59 -07001401
1402
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001403 def power_off(self, power_method=POWER_CONTROL_RPM):
1404 """Turn off power to this host via RPM, Servo or manual.
1405
1406 @param power_method Specifies which method of power control to
1407 use. By default "RPM" will be used. Valid values
1408 are the strings "RPM", "manual", "servoj10".
1409
1410 """
1411 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001412
1413
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001414 def power_on(self, power_method=POWER_CONTROL_RPM):
1415 """Turn on power to this host via RPM, Servo or manual.
1416
1417 @param power_method Specifies which method of power control to
1418 use. By default "RPM" will be used. Valid values
1419 are the strings "RPM", "manual", "servoj10".
1420
1421 """
1422 self._set_power('ON', power_method)
1423
1424
1425 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1426 """Cycle power to this host by turning it OFF, then ON.
1427
1428 @param power_method Specifies which method of power control to
1429 use. By default "RPM" will be used. Valid values
1430 are the strings "RPM", "manual", "servoj10".
1431
1432 """
1433 if power_method in (self.POWER_CONTROL_SERVO,
1434 self.POWER_CONTROL_MANUAL):
1435 self.power_off(power_method=power_method)
1436 time.sleep(self._POWER_CYCLE_TIMEOUT)
1437 self.power_on(power_method=power_method)
1438 else:
1439 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001440
1441
1442 def get_platform(self):
1443 """Determine the correct platform label for this host.
1444
1445 @returns a string representing this host's platform.
1446 """
1447 crossystem = utils.Crossystem(self)
1448 crossystem.init()
1449 # Extract fwid value and use the leading part as the platform id.
1450 # fwid generally follow the format of {platform}.{firmware version}
1451 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1452 platform = crossystem.fwid().split('.')[0].lower()
1453 # Newer platforms start with 'Google_' while the older ones do not.
1454 return platform.replace('google_', '')
1455
1456
Aviv Keshet74c89a92013-02-04 15:18:30 -08001457 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001458 def get_board(self):
1459 """Determine the correct board label for this host.
1460
1461 @returns a string representing this host's board.
1462 """
1463 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1464 run_method=self.run)
1465 board = release_info['CHROMEOS_RELEASE_BOARD']
1466 # Devices in the lab generally have the correct board name but our own
1467 # development devices have {board_name}-signed-{key_type}. The board
1468 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001469 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001470 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001471 return board_format_string % board.split('-')[0]
1472 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001473
1474
Aviv Keshet74c89a92013-02-04 15:18:30 -08001475 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001476 def has_lightsensor(self):
1477 """Determine the correct board label for this host.
1478
1479 @returns the string 'lightsensor' if this host has a lightsensor or
1480 None if it does not.
1481 """
1482 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001483 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001484 try:
1485 # Run the search cmd following the symlinks. Stderr_tee is set to
1486 # None as there can be a symlink loop, but the command will still
1487 # execute correctly with a few messages printed to stderr.
1488 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1489 return 'lightsensor'
1490 except error.AutoservRunError:
1491 # egrep exited with a return code of 1 meaning none of the possible
1492 # lightsensor files existed.
1493 return None
1494
1495
Aviv Keshet74c89a92013-02-04 15:18:30 -08001496 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001497 def has_bluetooth(self):
1498 """Determine the correct board label for this host.
1499
1500 @returns the string 'bluetooth' if this host has bluetooth or
1501 None if it does not.
1502 """
1503 try:
1504 self.run('test -d /sys/class/bluetooth/hci0')
1505 # test exited with a return code of 0.
1506 return 'bluetooth'
1507 except error.AutoservRunError:
1508 # test exited with a return code 1 meaning the directory did not
1509 # exist.
1510 return None
1511
1512
1513 def get_labels(self):
1514 """Return a list of labels for this given host.
1515
1516 This is the main way to retrieve all the automatic labels for a host
1517 as it will run through all the currently implemented label functions.
1518 """
1519 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001520 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001521 label = label_function(self)
1522 if label:
1523 labels.append(label)
1524 return labels