blob: 6ca107699b6cd927aa03d9b21d9df575b98ef13f [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
beeps687243d2013-07-18 15:29:27 -07006import getpass
Christopher Wiley0ed712b2013-04-09 15:25:12 -07007import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07008import logging
Dan Shi0f466e82013-02-22 15:44:58 -08009import os
Simran Basid5e5e272012-09-24 15:23:59 -070010import re
beeps687243d2013-07-18 15:29:27 -070011import smtplib
Christopher Wileyd78249a2013-03-01 13:05:31 -080012import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070013import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070014import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070015import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070016
J. Richard Barnette45e93de2012-04-11 17:24:15 -070017from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080018from autotest_lib.client.common_lib import error
19from autotest_lib.client.common_lib import global_config
beeps687243d2013-07-18 15:29:27 -070020from autotest_lib.client.common_lib import mail
21from autotest_lib.client.common_lib import site_utils
J. Richard Barnette45e93de2012-04-11 17:24:15 -070022from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080023from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080024from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080025from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070026from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070027from autotest_lib.server import autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070028from autotest_lib.server import utils as server_utils
Scott Zawalski89c44dd2013-02-26 09:28:02 -050029from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070030from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
J. Richard Barnette75487572013-03-08 12:47:50 -080031from autotest_lib.server.cros.servo import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070032from autotest_lib.server.hosts import remote
beeps687243d2013-07-18 15:29:27 -070033from autotest_lib.site_utils.graphite import stats
Simran Basidcff4252012-11-20 16:13:20 -080034from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070035
36
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080037def _make_servo_hostname(hostname):
38 host_parts = hostname.split('.')
39 host_parts[0] = host_parts[0] + '-servo'
40 return '.'.join(host_parts)
41
42
43def _get_lab_servo(target_hostname):
44 """Instantiate a Servo for |target_hostname| in the lab.
45
46 Assuming that |target_hostname| is a device in the CrOS test
47 lab, create and return a Servo object pointed at the servo
48 attached to that DUT. The servo in the test lab is assumed
49 to already have servod up and running on it.
50
51 @param target_hostname: device whose servo we want to target.
52 @return an appropriately configured Servo instance.
53 """
54 servo_host = _make_servo_hostname(target_hostname)
55 if utils.host_is_in_lab_zone(servo_host):
56 try:
J. Richard Barnetted5f807a2013-02-11 16:51:00 -080057 return servo.Servo(servo_host=servo_host)
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -080058 except: # pylint: disable=W0702
59 # TODO(jrbarnette): Long-term, if we can't get to
60 # a servo in the lab, we want to fail, so we should
61 # pass any exceptions along. Short-term, we're not
62 # ready to rely on servo, so we ignore failures.
63 pass
64 return None
65
66
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070067def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
68 connect_timeout=None, alive_interval=None):
69 """Override default make_ssh_command to use options tuned for Chrome OS.
70
71 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070072 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
73 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070074
Dale Curtisaa5eedb2011-08-23 16:18:52 -070075 - ServerAliveInterval=180; which causes SSH to ping connection every
76 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
77 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
78 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070079
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070080 - ServerAliveCountMax=3; consistency with remote_access.sh.
81
82 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
83 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070084
85 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
86 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070087
88 - SSH protocol forced to 2; needed for ServerAliveInterval.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080089
90 @param user User name to use for the ssh connection.
91 @param port Port on the target host to use for ssh connection.
92 @param opts Additional options to the ssh command.
93 @param hosts_file Ignored.
94 @param connect_timeout Ignored.
95 @param alive_interval Ignored.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070096 """
97 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
98 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070099 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
100 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
101 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -0700102 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700103
104
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800105
Aviv Keshet74c89a92013-02-04 15:18:30 -0800106def add_label_detector(label_function_list, label_list=None, label=None):
107 """Decorator used to group functions together into the provided list.
108 @param label_function_list: List of label detecting functions to add
109 decorated function to.
110 @param label_list: List of detectable labels to add detectable labels to.
111 (Default: None)
112 @param label: Label string that is detectable by this detection function
113 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800114 """
Simran Basic6f1f7a2012-10-16 10:47:46 -0700115 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -0800116 """
117 @param func: The function to be added as a detector.
118 """
119 label_function_list.append(func)
120 if label and label_list is not None:
121 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -0700122 return func
123 return add_func
124
125
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700126class SiteHost(remote.RemoteHost):
127 """Chromium OS specific subclass of Host."""
128
129 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -0500130 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700131
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800132 # Time to wait for new kernel to be marked successful after
133 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -0700134 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700135
Richard Barnette03a0c132012-11-05 12:40:35 -0800136 # Timeout values (in seconds) associated with various Chrome OS
137 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700138 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800139 # In general, a good rule of thumb is that the timeout can be up
140 # to twice the typical measured value on the slowest platform.
141 # The times here have not necessarily been empirically tested to
142 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700143 #
144 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800145 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
146 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700147 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800148 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800149 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700150 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800151 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800152 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800153 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700154 # REBOOT_TIMEOUT: How long to wait for a reboot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800155 # _INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700156
157 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -0800158 RESUME_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700159 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -0700160 USB_BOOT_TIMEOUT = 150
Chris Sosab76e0ee2013-05-22 16:55:41 -0700161
162 # We have a long timeout to ensure we don't flakily fail due to other
163 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
164 REBOOT_TIMEOUT = 300
165
Richard Barnette03a0c132012-11-05 12:40:35 -0800166 _INSTALL_TIMEOUT = 240
167
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800168 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
169 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
170 _USB_POWER_TIMEOUT = 5
171 _POWER_CYCLE_TIMEOUT = 10
172
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800173
Richard Barnette82c35912012-11-20 10:09:10 -0800174 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
175 'rpm_recovery_boards', type=str).split(',')
176
177 _MAX_POWER_CYCLE_ATTEMPTS = 6
178 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
179 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
180 'host[0-9]+')
181 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
182 'in_illuminance0_raw',
183 'illuminance0_input']
184 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
185 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800186 _DETECTABLE_LABELS = []
187 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
188 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700189
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800190 # Constants used in ping_wait_up() and ping_wait_down().
191 #
192 # _PING_WAIT_COUNT is the approximate number of polling
193 # cycles to use when waiting for a host state change.
194 #
195 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
196 # for arguments to the internal _ping_wait_for_status()
197 # method.
198 _PING_WAIT_COUNT = 40
199 _PING_STATUS_DOWN = False
200 _PING_STATUS_UP = True
201
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800202 # Allowed values for the power_method argument.
203
204 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
205 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
206 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
207 POWER_CONTROL_RPM = 'RPM'
208 POWER_CONTROL_SERVO = 'servoj10'
209 POWER_CONTROL_MANUAL = 'manual'
210
211 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
212 POWER_CONTROL_SERVO,
213 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800214
Simran Basi5e6339a2013-03-21 11:34:32 -0700215 _RPM_OUTLET_CHANGED = 'outlet_changed'
216
beeps687243d2013-07-18 15:29:27 -0700217 # pylint: disable=E1120
218 _NOTIFY_ADDRESS = global_config.global_config.get_config_value(
219 'SCHEDULER', 'notify_email', default='')
220
221 _SENDER_ADDRESS = global_config.global_config.get_config_value(
222 'SCHEDULER', "notify_email_from", default=getpass.getuser())
223
224 _ERROR_EMAIL_SUBJECT_FORMAT = 'job_repo_url changed for host %s'
225 _ERROR_EMAIL_MSG_FORMAT = ('While verifying the job_repo_url on %(host)s '
226 'the devserver changed from %(old_devserver)s '
227 'to %(new_devserver)s. This might indicate a '
228 'delay in %(build)s, re-staging artifacts took '
229 'an additional %(stage_time)s seconds.')
230
J. Richard Barnette964fba02012-10-24 17:34:29 -0700231 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800232 def get_servo_arguments(args_dict):
233 """Extract servo options from `args_dict` and return the result.
234
235 Take the provided dictionary of argument options and return
236 a subset that represent standard arguments needed to
237 construct a servo object for a host. The intent is to
238 provide standard argument processing from run_remote_tests
239 for tests that require a servo to operate.
240
241 Recommended usage:
242 ~~~~~~~~
243 args_dict = utils.args_to_dict(args)
244 servo_args = hosts.SiteHost.get_servo_arguments(args_dict)
245 host = hosts.create_host(machine, servo_args=servo_args)
246 ~~~~~~~~
247
248 @param args_dict Dictionary from which to extract the servo
249 arguments.
250 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700251 servo_args = {}
252 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800253 if arg in args_dict:
254 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700255 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700256
J. Richard Barnette964fba02012-10-24 17:34:29 -0700257
258 def _initialize(self, hostname, servo_args=None, *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700259 """Initialize superclasses, and |self.servo|.
260
261 For creating the host servo object, there are three
262 possibilities: First, if the host is a lab system known to
263 have a servo board, we connect to that servo unconditionally.
264 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700265 servo features for testing, it will pass settings for
266 `servo_host`, `servo_port`, or both. If neither of these
267 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700268
269 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700270 super(SiteHost, self)._initialize(hostname=hostname,
271 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700272 # self.env is a dictionary of environment variable settings
273 # to be exported for commands run on the host.
274 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
275 # errors that might happen.
276 self.env['LIBC_FATAL_STDERR_'] = '1'
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700277 self._xmlrpc_proxy_map = {}
J. Richard Barnettebe5ebcc2013-02-11 16:03:15 -0800278 self.servo = _get_lab_servo(hostname)
J. Richard Barnettead7da482012-10-30 16:46:52 -0700279 if not self.servo and servo_args is not None:
J. Richard Barnette964fba02012-10-24 17:34:29 -0700280 self.servo = servo.Servo(**servo_args)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700281
282
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500283 def get_repair_image_name(self):
284 """Generate a image_name from variables in the global config.
285
286 @returns a str of $board-version/$BUILD.
287
288 """
289 stable_version = global_config.global_config.get_config_value(
290 'CROS', 'stable_cros_version')
291 build_pattern = global_config.global_config.get_config_value(
292 'CROS', 'stable_build_pattern')
293 board = self._get_board_from_afe()
294 if board is None:
295 raise error.AutoservError('DUT has no board attribute, '
296 'cannot be repaired.')
297 return build_pattern % (board, stable_version)
298
299
Scott Zawalski62bacae2013-03-05 10:40:32 -0500300 def _host_in_AFE(self):
301 """Check if the host is an object the AFE knows.
302
303 @returns the host object.
304 """
305 return self._AFE.get_hosts(hostname=self.hostname)
306
307
Chris Sosab76e0ee2013-05-22 16:55:41 -0700308 def lookup_job_repo_url(self):
309 """Looks up the job_repo_url for the host.
310
311 @returns job_repo_url from AFE or None if not found.
312
313 @raises KeyError if the host does not have a job_repo_url
314 """
315 if not self._host_in_AFE():
316 return None
317
318 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700319 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
320 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700321
322
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500323 def clear_cros_version_labels_and_job_repo_url(self):
324 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500325 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400326 return
327
Scott Zawalski62bacae2013-03-05 10:40:32 -0500328 host_list = [self.hostname]
329 labels = self._AFE.get_labels(
330 name__startswith=ds_constants.VERSION_PREFIX,
331 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800332
Scott Zawalski62bacae2013-03-05 10:40:32 -0500333 for label in labels:
334 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500335
beepscb6f1e22013-06-28 19:14:10 -0700336 self.update_job_repo_url(None, None)
337
338
339 def update_job_repo_url(self, devserver_url, image_name):
340 """
341 Updates the job_repo_url host attribute and asserts it's value.
342
343 @param devserver_url: The devserver to use in the job_repo_url.
344 @param image_name: The name of the image to use in the job_repo_url.
345
346 @raises AutoservError: If we failed to update the job_repo_url.
347 """
348 repo_url = None
349 if devserver_url and image_name:
350 repo_url = tools.get_package_url(devserver_url, image_name)
351 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500352 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700353 if self.lookup_job_repo_url() != repo_url:
354 raise error.AutoservError('Failed to update job_repo_url with %s, '
355 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500356
357
Dan Shie9309262013-06-19 22:50:21 -0700358 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400359 """Add cros_version labels and host attribute job_repo_url.
360
361 @param image_name: The name of the image e.g.
362 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700363
Scott Zawalskieadbf702013-03-14 09:23:06 -0400364 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500365 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400366 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500367
Scott Zawalskieadbf702013-03-14 09:23:06 -0400368 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700369 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500370
371 labels = self._AFE.get_labels(name=cros_label)
372 if labels:
373 label = labels[0]
374 else:
375 label = self._AFE.create_label(name=cros_label)
376
377 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700378 self.update_job_repo_url(devserver_url, image_name)
379
380
381 def verify_job_repo_url(self):
382 """
383 Make sure job_repo_url of this host is valid.
384
joychen03eaad92013-06-26 09:55:21 -0700385 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700386 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
387 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
388 download and extract it. If the devserver embedded in the url is
389 unresponsive, update the job_repo_url of the host after staging it on
390 another devserver.
391
392 @param job_repo_url: A url pointing to the devserver where the autotest
393 package for this build should be staged.
394
395 @raises DevServerException: If we could not resolve a devserver.
396 @raises AutoservError: If we're unable to save the new job_repo_url as
397 a result of choosing a new devserver because the old one failed to
398 respond to a health check.
399 """
400 job_repo_url = self.lookup_job_repo_url()
401 if not job_repo_url:
402 logging.warning('No job repo url set on host %s', self.hostname)
403 return
404
405 logging.info('Verifying job repo url %s', job_repo_url)
406 devserver_url, image_name = tools.get_devserver_build_from_package_url(
407 job_repo_url)
408
409 ds = dev_server.ImageServer.resolve(image_name)
beeps687243d2013-07-18 15:29:27 -0700410 new_devserver_url = ds.url()
beepscb6f1e22013-06-28 19:14:10 -0700411
412 logging.info('Staging autotest artifacts for %s on devserver %s',
413 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700414
415 start_time = time.time()
beepscb6f1e22013-06-28 19:14:10 -0700416 ds.stage_artifacts(image_name, ['autotest'])
beeps687243d2013-07-18 15:29:27 -0700417 stage_time = time.time() - start_time
418
419 # Record how much of the verification time comes from a devserver
420 # restage. If we're doing things right we should not see multiple
421 # devservers for a given board/build/branch path.
422 try:
423 board, build_type, branch = site_utils.ParseBuildName(
424 image_name)[:3]
425 except site_utils.ParseBuildNameException as e:
426 pass
427 else:
428 new_devserver = new_devserver_url[
429 new_devserver_url.find('/')+2:new_devserver_url.rfind(':')]
430 stats_key = {
431 'board': board,
432 'build_type': build_type,
433 'branch': branch,
434 'devserver': new_devserver.replace('.', '_'),
435 }
436 stats.Gauge('verify_job_repo_url').send(
437 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
438 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700439
440 if ds.url() != devserver_url:
beeps687243d2013-07-18 15:29:27 -0700441 error_dict = {
442 'host': self.hostname,
443 'old_devserver': devserver_url,
444 'new_devserver': new_devserver_url,
445 'build': image_name,
446 'stage_time': stage_time,
447 }
448 try:
449 mail.send(self._SENDER_ADDRESS, self._NOTIFY_ADDRESS, '',
450 self._ERROR_EMAIL_SUBJECT_FORMAT % self.hostname,
451 self._ERROR_EMAIL_MSG_FORMAT % error_dict)
452 except smtplib.SMTPDataError:
453 logging.warning(self._ERROR_EMAIL_MSG_FORMAT, error_dict)
454
beepscb6f1e22013-06-28 19:14:10 -0700455 self.update_job_repo_url(ds.url(), image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400456
457
Dan Shi0f466e82013-02-22 15:44:58 -0800458 def _try_stateful_update(self, update_url, force_update, updater):
459 """Try to use stateful update to initialize DUT.
460
461 When DUT is already running the same version that machine_install
462 tries to install, stateful update is a much faster way to clean up
463 the DUT for testing, compared to a full reimage. It is implemeted
464 by calling autoupdater.run_update, but skipping updating root, as
465 updating the kernel is time consuming and not necessary.
466
467 @param update_url: url of the image.
468 @param force_update: Set to True to update the image even if the DUT
469 is running the same version.
470 @param updater: ChromiumOSUpdater instance used to update the DUT.
471 @returns: True if the DUT was updated with stateful update.
472
473 """
474 if not updater.check_version():
475 return False
476 if not force_update:
477 logging.info('Canceling stateful update because the new and '
478 'old versions are the same.')
479 return False
480 # Following folders should be rebuilt after stateful update.
481 # A test file is used to confirm each folder gets rebuilt after
482 # the stateful update.
483 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
484 test_file = '.test_file_to_be_deleted'
485 for folder in folders_to_check:
486 touch_path = os.path.join(folder, test_file)
487 self.run('touch %s' % touch_path)
488
489 if not updater.run_update(force_update=True, update_root=False):
490 return False
491
492 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700493 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800494 check_file_cmd = 'test -f %s; echo $?'
495 for folder in folders_to_check:
496 test_file_path = os.path.join(folder, test_file)
497 result = self.run(check_file_cmd % test_file_path,
498 ignore_status=True)
499 if result.exit_status == 1:
500 return False
501 return True
502
503
J. Richard Barnette7275b612013-06-04 18:13:11 -0700504 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800505 """After the DUT is updated, confirm machine_install succeeded.
506
507 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700508 @param expected_kernel: kernel expected to be active after reboot,
509 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800510
511 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700512 # Touch the lab machine file to leave a marker that
513 # distinguishes this image from other test images.
514 # Afterwards, we must re-run the autoreboot script because
515 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800516 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800517 self.run('start autoreboot')
518
J. Richard Barnette7275b612013-06-04 18:13:11 -0700519 # Figure out the newly active kernel.
520 active_kernel, _ = updater.get_kernel_state()
521
522 # Check for rollback due to a bad build.
523 if expected_kernel and active_kernel != expected_kernel:
524 # Print out some information to make it easier to debug
525 # the rollback.
Dan Shi0f466e82013-02-22 15:44:58 -0800526 logging.debug('Dumping partition table.')
Dan Shi346725f2013-03-20 15:22:38 -0700527 self.run('cgpt show $(rootdev -s -d)')
Dan Shi0f466e82013-02-22 15:44:58 -0800528 logging.debug('Dumping crossystem for firmware debugging.')
Dan Shi346725f2013-03-20 15:22:38 -0700529 self.run('crossystem --all')
Dan Shi0f466e82013-02-22 15:44:58 -0800530 raise autoupdater.ChromiumOSError(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700531 'Build %s failed to boot on %s; system rolled back '
532 'to previous build' % (updater.update_version,
533 self.hostname))
Dan Shi0f466e82013-02-22 15:44:58 -0800534
J. Richard Barnette7275b612013-06-04 18:13:11 -0700535 # Check that we've got the build we meant to install.
536 if not updater.check_version_to_confirm_install():
537 raise autoupdater.ChromiumOSError(
538 'Failed to update %s to build %s; found build '
539 '%s instead' % (self.hostname,
540 updater.update_version,
541 updater.get_build_id()))
Scott Zawalski62bacae2013-03-05 10:40:32 -0500542
J. Richard Barnette7275b612013-06-04 18:13:11 -0700543 # Make sure chromeos-setgoodkernel runs.
544 try:
Dan Shi0f466e82013-02-22 15:44:58 -0800545 utils.poll_for_condition(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700546 lambda: (updater.get_kernel_tries(active_kernel) == 0
547 and updater.get_kernel_success(active_kernel)),
548 exception=autoupdater.ChromiumOSError(),
Dan Shi0f466e82013-02-22 15:44:58 -0800549 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
J. Richard Barnette7275b612013-06-04 18:13:11 -0700550 except autoupdater.ChromiumOSError as e:
551 services_status = self.run('status system-services').stdout
552 if services_status != 'system-services start/running\n':
553 event = ('Chrome failed to reach login screen')
554 else:
555 event = ('update-engine failed to call '
556 'chromeos-setgoodkernel')
557 raise autoupdater.ChromiumOSError(
558 'After update and reboot, %s '
559 'within %d seconds' % (event,
560 self._KERNEL_UPDATE_TIMEOUT))
Dan Shi0f466e82013-02-22 15:44:58 -0800561
562
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700563 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400564 """Stage a build on a devserver and return the update_url.
565
566 @param image_name: a name like lumpy-release/R27-3837.0.0
567 @returns an update URL like:
568 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
569 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700570 if not image_name:
571 image_name = self.get_repair_image_name()
572 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400573 devserver = dev_server.ImageServer.resolve(image_name)
574 devserver.trigger_download(image_name, synchronous=False)
575 return tools.image_url_pattern() % (devserver.url(), image_name)
576
577
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700578 def stage_image_for_servo(self, image_name=None):
579 """Stage a build on a devserver and return the update_url.
580
581 @param image_name: a name like lumpy-release/R27-3837.0.0
582 @returns an update URL like:
583 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
584 """
585 if not image_name:
586 image_name = self.get_repair_image_name()
587 logging.info('Staging build for servo install: %s', image_name)
588 devserver = dev_server.ImageServer.resolve(image_name)
589 devserver.stage_artifacts(image_name, ['test_image'])
590 return devserver.get_test_image_url(image_name)
591
592
Chris Sosaa3ac2152012-05-23 22:23:13 -0700593 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500594 local_devserver=False, repair=False):
595 """Install the DUT.
596
Dan Shi0f466e82013-02-22 15:44:58 -0800597 Use stateful update if the DUT is already running the same build.
598 Stateful update does not update kernel and tends to run much faster
599 than a full reimage. If the DUT is running a different build, or it
600 failed to do a stateful update, full update, including kernel update,
601 will be applied to the DUT.
602
Scott Zawalskieadbf702013-03-14 09:23:06 -0400603 Once a host enters machine_install its cros_version label will be
604 removed as well as its host attribute job_repo_url (used for
605 package install).
606
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500607 @param update_url: The url to use for the update
608 pattern: http://$devserver:###/update/$build
609 If update_url is None and repair is True we will install the
610 stable image listed in global_config under
611 CROS.stable_cros_version.
612 @param force_update: Force an update even if the version installed
613 is the same. Default:False
614 @param local_devserver: Used by run_remote_test to allow people to
615 use their local devserver. Default: False
616 @param repair: Whether or not we are in repair mode. This adds special
617 cases for repairing a machine like starting update_engine.
618 Setting repair to True sets force_update to True as well.
619 default: False
620 @raises autoupdater.ChromiumOSError
621
622 """
Dan Shi7458bf62013-06-10 12:50:16 -0700623 if update_url:
624 logging.debug('update url is set to %s', update_url)
625 else:
626 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700627 if self._parser.options.image:
628 requested_build = self._parser.options.image
629 if requested_build.startswith('http://'):
630 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700631 logging.debug('update url is retrieved from requested_build'
632 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700633 else:
634 # Try to stage any build that does not start with
635 # http:// on the devservers defined in
636 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700637 update_url = self._stage_image_for_update(requested_build)
638 logging.debug('Build staged, and update_url is set to: %s',
639 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700640 elif repair:
641 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700642 logging.debug('Build staged, and update_url is set to: %s',
643 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400644 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700645 raise autoupdater.ChromiumOSError(
646 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500647
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500648 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800649 # In case the system is in a bad state, we always reboot the machine
650 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700651 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500652 self.run('stop update-engine; start update-engine')
653 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800654
Chris Sosaa3ac2152012-05-23 22:23:13 -0700655 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700656 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800657 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400658 # Remove cros-version and job_repo_url host attribute from host.
659 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800660 # If the DUT is already running the same build, try stateful update
661 # first. Stateful update does not update kernel and tends to run much
662 # faster than a full reimage.
663 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700664 updated = self._try_stateful_update(
665 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800666 if updated:
667 logging.info('DUT is updated with stateful update.')
668 except Exception as e:
669 logging.exception(e)
670 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700671
Dan Shi0f466e82013-02-22 15:44:58 -0800672 inactive_kernel = None
673 # Do a full update if stateful update is not applicable or failed.
674 if not updated:
675 # In case the system is in a bad state, we always reboot the
676 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700677 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700678
679 # TODO(sosa): Remove temporary hack to get rid of bricked machines
680 # that can't update due to a corrupted policy.
681 self.run('rm -rf /var/lib/whitelist')
682 self.run('touch /var/lib/whitelist')
683 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400684 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700685
Dan Shi0f466e82013-02-22 15:44:58 -0800686 if updater.run_update(force_update):
687 updated = True
688 # Figure out active and inactive kernel.
689 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700690
Dan Shi0f466e82013-02-22 15:44:58 -0800691 # Ensure inactive kernel has higher priority than active.
692 if (updater.get_kernel_priority(inactive_kernel)
693 < updater.get_kernel_priority(active_kernel)):
694 raise autoupdater.ChromiumOSError(
695 'Update failed. The priority of the inactive kernel'
696 ' partition is less than that of the active kernel'
697 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700698
Dan Shi0f466e82013-02-22 15:44:58 -0800699 update_engine_log = '/var/log/update_engine.log'
700 logging.info('Dumping %s', update_engine_log)
701 self.run('cat %s' % update_engine_log)
702 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700703 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700704
Dan Shi0f466e82013-02-22 15:44:58 -0800705 if updated:
706 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400707 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700708 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800709
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700710 # Clean up any old autotest directories which may be lying around.
711 for path in global_config.global_config.get_config_value(
712 'AUTOSERV', 'client_autodir_paths', type=list):
713 self.run('rm -rf ' + path)
714
715
Richard Barnette82c35912012-11-20 10:09:10 -0800716 def _get_board_from_afe(self):
717 """Retrieve this host's board from its labels in the AFE.
718
719 Looks for a host label of the form "board:<board>", and
720 returns the "<board>" part of the label. `None` is returned
721 if there is not a single, unique label matching the pattern.
722
723 @returns board from label, or `None`.
724 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700725 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800726
727
728 def get_build(self):
729 """Retrieve the current build for this Host from the AFE.
730
731 Looks through this host's labels in the AFE to determine its build.
732
733 @returns The current build or None if it could not find it or if there
734 were multiple build labels assigned to this host.
735 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700736 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800737
738
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500739 def _install_repair(self):
740 """Attempt to repair this host using upate-engine.
741
742 If the host is up, try installing the DUT with a stable
743 "repair" version of Chrome OS as defined in the global_config
744 under CROS.stable_cros_version.
745
Scott Zawalski62bacae2013-03-05 10:40:32 -0500746 @raises AutoservRepairMethodNA if the DUT is not reachable.
747 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500748
749 """
750 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500751 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500752
753 logging.info('Attempting to reimage machine to repair image.')
754 try:
755 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700756 except autoupdater.ChromiumOSError as e:
757 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500758 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500759 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500760
761
Scott Zawalski62bacae2013-03-05 10:40:32 -0500762 def servo_install(self, image_url=None):
763 """
764 Re-install the OS on the DUT by:
765 1) installing a test image on a USB storage device attached to the Servo
766 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800767 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700768 3) installing the image with chromeos-install.
769
Scott Zawalski62bacae2013-03-05 10:40:32 -0500770 @param image_url: If specified use as the url to install on the DUT.
771 otherwise boot the currently staged image on the USB stick.
Richard Barnette03a0c132012-11-05 12:40:35 -0800772
Scott Zawalski62bacae2013-03-05 10:40:32 -0500773 @raises AutoservError if the image fails to boot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800774 """
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700775 self.servo.install_recovery_image(image_url)
Richard Barnette03a0c132012-11-05 12:40:35 -0800776 if not self.wait_up(timeout=self.USB_BOOT_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500777 raise error.AutoservRepairFailure(
778 'DUT failed to boot from USB after %d seconds' %
779 self.USB_BOOT_TIMEOUT)
780
781 self.run('chromeos-install --yes', timeout=self._INSTALL_TIMEOUT)
Richard Barnette03a0c132012-11-05 12:40:35 -0800782 self.servo.power_long_press()
Fang Dengafb88142013-05-30 17:44:31 -0700783 self.servo.switch_usbkey('off')
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700784 # We *must* use power_on() here; on Parrot it's how we get
785 # out of recovery mode.
786 self.servo.get_power_state_controller().power_on()
Richard Barnette03a0c132012-11-05 12:40:35 -0800787 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
788 raise error.AutoservError('DUT failed to reboot installed '
789 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500790 self.BOOT_TIMEOUT)
791
792
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700793 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500794 """Reinstall the DUT utilizing servo and a test image.
795
796 Re-install the OS on the DUT by:
797 1) installing a test image on a USB storage device attached to the Servo
798 board,
799 2) booting that image in recovery mode, and then
800 3) installing the image with chromeos-install.
801
Scott Zawalski62bacae2013-03-05 10:40:32 -0500802 @raises AutoservRepairMethodNA if the device does not have servo
803 support.
804
805 """
806 if not self.servo:
807 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
808 'DUT has no servo support.')
809
810 logging.info('Attempting to recovery servo enabled device with '
811 'servo_repair_reinstall')
812
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700813 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500814 self.servo_install(image_url)
815
816
817 def _servo_repair_power(self):
818 """Attempt to repair DUT using an attached Servo.
819
820 Attempt to power on the DUT via power_long_press.
821
822 @raises AutoservRepairMethodNA if the device does not have servo
823 support.
824 @raises AutoservRepairFailure if the repair fails for any reason.
825 """
826 if not self.servo:
827 raise error.AutoservRepairMethodNA('Repair Power NA: '
828 'DUT has no servo support.')
829
830 logging.info('Attempting to recover servo enabled device by '
831 'powering it off and on.')
832 self.servo.get_power_state_controller().power_off()
833 self.servo.get_power_state_controller().power_on()
834 if self.wait_up(self.BOOT_TIMEOUT):
835 return
836
837 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800838
839
Richard Barnette82c35912012-11-20 10:09:10 -0800840 def _powercycle_to_repair(self):
841 """Utilize the RPM Infrastructure to bring the host back up.
842
843 If the host is not up/repaired after the first powercycle we utilize
844 auto fallback to the last good install by powercycling and rebooting the
845 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500846
847 @raises AutoservRepairMethodNA if the device does not support remote
848 power.
849 @raises AutoservRepairFailure if the repair fails for any reason.
850
Richard Barnette82c35912012-11-20 10:09:10 -0800851 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500852 if not self.has_power():
853 raise error.AutoservRepairMethodNA('Device does not support power.')
854
Richard Barnette82c35912012-11-20 10:09:10 -0800855 logging.info('Attempting repair via RPM powercycle.')
856 failed_cycles = 0
857 self.power_cycle()
858 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
859 failed_cycles += 1
860 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -0500861 raise error.AutoservRepairFailure(
862 'Powercycled host %s %d times; device did not come back'
863 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -0800864 self.power_cycle()
865 if failed_cycles == 0:
866 logging.info('Powercycling was successful first time.')
867 else:
868 logging.info('Powercycling was successful after %d failures.',
869 failed_cycles)
870
871
872 def repair_full(self):
873 """Repair a host for repair level NO_PROTECTION.
874
875 This overrides the base class function for repair; it does
876 not call back to the parent class, but instead offers a
877 simplified implementation based on the capabilities in the
878 Chrome OS test lab.
879
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700880 If `self.verify()` fails, the following procedures are
881 attempted:
882 1. Try to re-install to a known stable image using
883 auto-update.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500884 2. If there's a servo for the DUT, try to power the DUT off and
885 on.
886 3. If there's a servo for the DUT, try to re-install via
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700887 the servo.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500888 4. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800889 by power-cycling.
890
891 As with the parent method, the last operation performed on
892 the DUT must be to call `self.verify()`; if that call fails,
893 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700894
Scott Zawalski62bacae2013-03-05 10:40:32 -0500895 @raises AutoservRepairTotalFailure if the repair process fails to
896 fix the DUT.
Richard Barnette82c35912012-11-20 10:09:10 -0800897 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500898 # TODO(scottz): This should use something similar to label_decorator,
899 # but needs to be populated in order so DUTs are repaired with the
900 # least amount of effort.
901 repair_funcs = [self._install_repair, self._servo_repair_power,
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700902 self._servo_repair_reinstall,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500903 self._powercycle_to_repair]
904 errors = []
905 for repair_func in repair_funcs:
906 try:
907 repair_func()
908 self.verify()
909 return
910 except Exception as e:
911 logging.warn('Failed to repair device: %s', e)
912 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500913
Scott Zawalski62bacae2013-03-05 10:40:32 -0500914 raise error.AutoservRepairTotalFailure(
915 'All attempts at repairing the device failed:\n%s' %
916 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -0800917
918
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700919 def close(self):
920 super(SiteHost, self).close()
921 self.xmlrpc_disconnect_all()
922
923
Simran Basi5e6339a2013-03-21 11:34:32 -0700924 def _cleanup_poweron(self):
925 """Special cleanup method to make sure hosts always get power back."""
926 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
927 hosts = afe.get_hosts(hostname=self.hostname)
928 if not hosts or not (self._RPM_OUTLET_CHANGED in
929 hosts[0].attributes):
930 return
931 logging.debug('This host has recently interacted with the RPM'
932 ' Infrastructure. Ensuring power is on.')
933 try:
934 self.power_on()
935 except rpm_client.RemotePowerException:
936 # If cleanup has completed but there was an issue with the RPM
937 # Infrastructure, log an error message rather than fail cleanup
938 logging.error('Failed to turn Power On for this host after '
939 'cleanup through the RPM Infrastructure.')
940 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
941 hostname=self.hostname)
942
943
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700944 def cleanup(self):
Chris Sosaf4d43ff2012-10-30 11:21:05 -0700945 client_at = autotest.Autotest(self)
Richard Barnette82c35912012-11-20 10:09:10 -0800946 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500947 try:
948 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
949 '_clear_login_prompt_state')
950 self.run('restart ui')
951 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
952 '_wait_for_login_prompt')
Alex Millerf4517962013-02-25 15:03:02 -0800953 except (error.AutotestRunError, error.AutoservRunError):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -0500954 logging.warn('Unable to restart ui, rebooting device.')
955 # Since restarting the UI fails fall back to normal Autotest
956 # cleanup routines, i.e. reboot the machine.
957 super(SiteHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -0700958 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -0700959 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -0700960 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700961
962
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700963 def reboot(self, **dargs):
964 """
965 This function reboots the site host. The more generic
966 RemoteHost.reboot() performs sync and sleeps for 5
967 seconds. This is not necessary for Chrome OS devices as the
968 sync should be finished in a short time during the reboot
969 command.
970 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800971 if 'reboot_cmd' not in dargs:
972 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
973 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700974 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +0800975 if 'fastsync' not in dargs:
976 dargs['fastsync'] = True
Yu-Ju Honga2be94a2012-07-31 09:48:52 -0700977 super(SiteHost, self).reboot(**dargs)
978
979
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700980 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800981 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700982
Richard Barnetteb2bc13c2013-01-08 17:32:51 -0800983 Tests for the following conditions:
984 1. All conditions tested by the parent version of this
985 function.
986 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -0700987 3. Sufficient space in /mnt/stateful_partition/encrypted.
988 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700989
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700990 """
991 super(SiteHost, self).verify_software()
992 self.check_diskspace(
993 '/mnt/stateful_partition',
994 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -0700995 'SERVER', 'gb_diskspace_required', type=float,
996 default=20.0))
997 self.check_diskspace(
998 '/mnt/stateful_partition/encrypted',
999 global_config.global_config.get_config_value(
1000 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1001 default=0.1))
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001002 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001003 # Makes sure python is present, loads and can use built in functions.
1004 # We have seen cases where importing cPickle fails with undefined
1005 # symbols in cPickle.so.
1006 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001007
1008
Christopher Wileyd78249a2013-03-01 13:05:31 -08001009 def xmlrpc_connect(self, command, port, command_name=None,
1010 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001011 """Connect to an XMLRPC server on the host.
1012
1013 The `command` argument should be a simple shell command that
1014 starts an XMLRPC server on the given `port`. The command
1015 must not daemonize, and must terminate cleanly on SIGTERM.
1016 The command is started in the background on the host, and a
1017 local XMLRPC client for the server is created and returned
1018 to the caller.
1019
1020 Note that the process of creating an XMLRPC client makes no
1021 attempt to connect to the remote server; the caller is
1022 responsible for determining whether the server is running
1023 correctly, and is ready to serve requests.
1024
Christopher Wileyd78249a2013-03-01 13:05:31 -08001025 Optionally, the caller can pass ready_test_name, a string
1026 containing the name of a method to call on the proxy. This
1027 method should take no parameters and return successfully only
1028 when the server is ready to process client requests. When
1029 ready_test_name is set, xmlrpc_connect will block until the
1030 proxy is ready, and throw a TestError if the server isn't
1031 ready by timeout_seconds.
1032
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001033 @param command Shell command to start the server.
1034 @param port Port number on which the server is expected to
1035 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001036 @param command_name String to use as input to `pkill` to
1037 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001038 @param ready_test_name String containing the name of a
1039 method defined on the XMLRPC server.
1040 @param timeout_seconds Number of seconds to wait
1041 for the server to become 'ready.' Will throw a
1042 TestFail error if server is not ready in time.
1043
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001044 """
1045 self.xmlrpc_disconnect(port)
1046
1047 # Chrome OS on the target closes down most external ports
1048 # for security. We could open the port, but doing that
1049 # would conflict with security tests that check that only
1050 # expected ports are open. So, to get to the port on the
1051 # target we use an ssh tunnel.
1052 local_port = utils.get_unused_port()
1053 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1054 ssh_cmd = make_ssh_command(opts=tunnel_options)
1055 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1056 logging.debug('Full tunnel command: %s', tunnel_cmd)
1057 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1058 logging.debug('Started XMLRPC tunnel, local = %d'
1059 ' remote = %d, pid = %d',
1060 local_port, port, tunnel_proc.pid)
1061
1062 # Start the server on the host. Redirection in the command
1063 # below is necessary, because 'ssh' won't terminate until
1064 # background child processes close stdin, stdout, and
1065 # stderr.
1066 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
1067 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
1068 logging.debug('Started XMLRPC server on host %s, pid = %s',
1069 self.hostname, remote_pid)
1070
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001071 self._xmlrpc_proxy_map[port] = (command_name, tunnel_proc)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001072 rpc_url = 'http://localhost:%d' % local_port
Christopher Wileyd78249a2013-03-01 13:05:31 -08001073 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
1074 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001075 # retry.retry logs each attempt; calculate delay_sec to
1076 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001077 @retry.retry((socket.error,
1078 xmlrpclib.ProtocolError,
1079 httplib.BadStatusLine),
Christopher Wileyd78249a2013-03-01 13:05:31 -08001080 timeout_min=timeout_seconds/60.0,
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001081 delay_sec=min(max(timeout_seconds/20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001082 def ready_test():
1083 """ Call proxy.ready_test_name(). """
1084 getattr(proxy, ready_test_name)()
1085 successful = False
1086 try:
1087 logging.info('Waiting %d seconds for XMLRPC server '
1088 'to start.', timeout_seconds)
1089 ready_test()
1090 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001091 finally:
1092 if not successful:
1093 logging.error('Failed to start XMLRPC server.')
1094 self.xmlrpc_disconnect(port)
1095 logging.info('XMLRPC server started successfully.')
1096 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001097
1098 def xmlrpc_disconnect(self, port):
1099 """Disconnect from an XMLRPC server on the host.
1100
1101 Terminates the remote XMLRPC server previously started for
1102 the given `port`. Also closes the local ssh tunnel created
1103 for the connection to the host. This function does not
1104 directly alter the state of a previously returned XMLRPC
1105 client object; however disconnection will cause all
1106 subsequent calls to methods on the object to fail.
1107
1108 This function does nothing if requested to disconnect a port
1109 that was not previously connected via `self.xmlrpc_connect()`
1110
1111 @param port Port number passed to a previous call to
1112 `xmlrpc_connect()`
1113 """
1114 if port not in self._xmlrpc_proxy_map:
1115 return
1116 entry = self._xmlrpc_proxy_map[port]
1117 remote_name = entry[0]
1118 tunnel_proc = entry[1]
1119 if remote_name:
1120 # We use 'pkill' to find our target process rather than
1121 # a PID, because the host may have rebooted since
1122 # connecting, and we don't want to kill an innocent
1123 # process with the same PID.
1124 #
1125 # 'pkill' helpfully exits with status 1 if no target
1126 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001127 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001128 # status.
1129 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
1130
1131 if tunnel_proc.poll() is None:
1132 tunnel_proc.terminate()
1133 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1134 else:
1135 logging.debug('Tunnel pid %d terminated early, status %d',
1136 tunnel_proc.pid, tunnel_proc.returncode)
1137 del self._xmlrpc_proxy_map[port]
1138
1139
1140 def xmlrpc_disconnect_all(self):
1141 """Disconnect all known XMLRPC proxy ports."""
1142 for port in self._xmlrpc_proxy_map.keys():
1143 self.xmlrpc_disconnect(port)
1144
1145
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001146 def _ping_check_status(self, status):
1147 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001148
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001149 @param status Check the ping status against this value.
1150 @return True iff `status` and the result of ping are the same
1151 (i.e. both True or both False).
1152
1153 """
1154 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1155 return not (status ^ (ping_val == 0))
1156
1157 def _ping_wait_for_status(self, status, timeout):
1158 """Wait for the host to have a given status (UP or DOWN).
1159
1160 Status is checked by polling. Polling will not last longer
1161 than the number of seconds in `timeout`. The polling
1162 interval will be long enough that only approximately
1163 _PING_WAIT_COUNT polling cycles will be executed, subject
1164 to a maximum interval of about one minute.
1165
1166 @param status Waiting will stop immediately if `ping` of the
1167 host returns this status.
1168 @param timeout Poll for at most this many seconds.
1169 @return True iff the host status from `ping` matched the
1170 requested status at the time of return.
1171
1172 """
1173 # _ping_check_status() takes about 1 second, hence the
1174 # "- 1" in the formula below.
1175 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1176 end_time = time.time() + timeout
1177 while time.time() <= end_time:
1178 if self._ping_check_status(status):
1179 return True
1180 if poll_interval > 0:
1181 time.sleep(poll_interval)
1182
1183 # The last thing we did was sleep(poll_interval), so it may
1184 # have been too long since the last `ping`. Check one more
1185 # time, just to be sure.
1186 return self._ping_check_status(status)
1187
1188 def ping_wait_up(self, timeout):
1189 """Wait for the host to respond to `ping`.
1190
1191 N.B. This method is not a reliable substitute for
1192 `wait_up()`, because a host that responds to ping will not
1193 necessarily respond to ssh. This method should only be used
1194 if the target DUT can be considered functional even if it
1195 can't be reached via ssh.
1196
1197 @param timeout Minimum time to allow before declaring the
1198 host to be non-responsive.
1199 @return True iff the host answered to ping before the timeout.
1200
1201 """
1202 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001203
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001204 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001205 """Wait until the host no longer responds to `ping`.
1206
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001207 This function can be used as a slightly faster version of
1208 `wait_down()`, by avoiding potentially long ssh timeouts.
1209
1210 @param timeout Minimum time to allow for the host to become
1211 non-responsive.
1212 @return True iff the host quit answering ping before the
1213 timeout.
1214
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001215 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001216 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001217
1218 def test_wait_for_sleep(self):
1219 """Wait for the client to enter low-power sleep mode.
1220
1221 The test for "is asleep" can't distinguish a system that is
1222 powered off; to confirm that the unit was asleep, it is
1223 necessary to force resume, and then call
1224 `test_wait_for_resume()`.
1225
1226 This function is expected to be called from a test as part
1227 of a sequence like the following:
1228
1229 ~~~~~~~~
1230 boot_id = host.get_boot_id()
1231 # trigger sleep on the host
1232 host.test_wait_for_sleep()
1233 # trigger resume on the host
1234 host.test_wait_for_resume(boot_id)
1235 ~~~~~~~~
1236
1237 @exception TestFail The host did not go to sleep within
1238 the allowed time.
1239 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001240 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001241 raise error.TestFail(
1242 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001243 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001244
1245
1246 def test_wait_for_resume(self, old_boot_id):
1247 """Wait for the client to resume from low-power sleep mode.
1248
1249 The `old_boot_id` parameter should be the value from
1250 `get_boot_id()` obtained prior to entering sleep mode. A
1251 `TestFail` exception is raised if the boot id changes.
1252
1253 See @ref test_wait_for_sleep for more on this function's
1254 usage.
1255
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001256 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001257 target host went to sleep.
1258
1259 @exception TestFail The host did not respond within the
1260 allowed time.
1261 @exception TestFail The host responded, but the boot id test
1262 indicated a reboot rather than a sleep
1263 cycle.
1264 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001265 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001266 raise error.TestFail(
1267 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001268 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001269 else:
1270 new_boot_id = self.get_boot_id()
1271 if new_boot_id != old_boot_id:
1272 raise error.TestFail(
1273 'client rebooted, but sleep was expected'
1274 ' (old boot %s, new boot %s)'
1275 % (old_boot_id, new_boot_id))
1276
1277
1278 def test_wait_for_shutdown(self):
1279 """Wait for the client to shut down.
1280
1281 The test for "has shut down" can't distinguish a system that
1282 is merely asleep; to confirm that the unit was down, it is
1283 necessary to force boot, and then call test_wait_for_boot().
1284
1285 This function is expected to be called from a test as part
1286 of a sequence like the following:
1287
1288 ~~~~~~~~
1289 boot_id = host.get_boot_id()
1290 # trigger shutdown on the host
1291 host.test_wait_for_shutdown()
1292 # trigger boot on the host
1293 host.test_wait_for_boot(boot_id)
1294 ~~~~~~~~
1295
1296 @exception TestFail The host did not shut down within the
1297 allowed time.
1298 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001299 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001300 raise error.TestFail(
1301 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001302 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001303
1304
1305 def test_wait_for_boot(self, old_boot_id=None):
1306 """Wait for the client to boot from cold power.
1307
1308 The `old_boot_id` parameter should be the value from
1309 `get_boot_id()` obtained prior to shutting down. A
1310 `TestFail` exception is raised if the boot id does not
1311 change. The boot id test is omitted if `old_boot_id` is not
1312 specified.
1313
1314 See @ref test_wait_for_shutdown for more on this function's
1315 usage.
1316
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001317 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001318 shut down.
1319
1320 @exception TestFail The host did not respond within the
1321 allowed time.
1322 @exception TestFail The host responded, but the boot id test
1323 indicated that there was no reboot.
1324 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001325 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001326 raise error.TestFail(
1327 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001328 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001329 elif old_boot_id:
1330 if self.get_boot_id() == old_boot_id:
1331 raise error.TestFail(
1332 'client is back up, but did not reboot'
1333 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001334
1335
1336 @staticmethod
1337 def check_for_rpm_support(hostname):
1338 """For a given hostname, return whether or not it is powered by an RPM.
1339
1340 @return None if this host does not follows the defined naming format
1341 for RPM powered DUT's in the lab. If it does follow the format,
1342 it returns a regular expression MatchObject instead.
1343 """
Richard Barnette82c35912012-11-20 10:09:10 -08001344 return re.match(SiteHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001345
1346
1347 def has_power(self):
1348 """For this host, return whether or not it is powered by an RPM.
1349
1350 @return True if this host is in the CROS lab and follows the defined
1351 naming format.
1352 """
1353 return SiteHost.check_for_rpm_support(self.hostname)
1354
1355
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001356 def _set_power(self, state, power_method):
1357 """Sets the power to the host via RPM, Servo or manual.
1358
1359 @param state Specifies which power state to set to DUT
1360 @param power_method Specifies which method of power control to
1361 use. By default "RPM" will be used. Valid values
1362 are the strings "RPM", "manual", "servoj10".
1363
1364 """
1365 ACCEPTABLE_STATES = ['ON', 'OFF']
1366
1367 if state.upper() not in ACCEPTABLE_STATES:
1368 raise error.TestError('State must be one of: %s.'
1369 % (ACCEPTABLE_STATES,))
1370
1371 if power_method == self.POWER_CONTROL_SERVO:
1372 logging.info('Setting servo port J10 to %s', state)
1373 self.servo.set('prtctl3_pwren', state.lower())
1374 time.sleep(self._USB_POWER_TIMEOUT)
1375 elif power_method == self.POWER_CONTROL_MANUAL:
1376 logging.info('You have %d seconds to set the AC power to %s.',
1377 self._POWER_CYCLE_TIMEOUT, state)
1378 time.sleep(self._POWER_CYCLE_TIMEOUT)
1379 else:
1380 if not self.has_power():
1381 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001382 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1383 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1384 hostname=self.hostname)
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001385 rpm_client.set_power(self.hostname, state.upper())
Simran Basid5e5e272012-09-24 15:23:59 -07001386
1387
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001388 def power_off(self, power_method=POWER_CONTROL_RPM):
1389 """Turn off power to this host via RPM, Servo or manual.
1390
1391 @param power_method Specifies which method of power control to
1392 use. By default "RPM" will be used. Valid values
1393 are the strings "RPM", "manual", "servoj10".
1394
1395 """
1396 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001397
1398
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001399 def power_on(self, power_method=POWER_CONTROL_RPM):
1400 """Turn on power to this host via RPM, Servo or manual.
1401
1402 @param power_method Specifies which method of power control to
1403 use. By default "RPM" will be used. Valid values
1404 are the strings "RPM", "manual", "servoj10".
1405
1406 """
1407 self._set_power('ON', power_method)
1408
1409
1410 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1411 """Cycle power to this host by turning it OFF, then ON.
1412
1413 @param power_method Specifies which method of power control to
1414 use. By default "RPM" will be used. Valid values
1415 are the strings "RPM", "manual", "servoj10".
1416
1417 """
1418 if power_method in (self.POWER_CONTROL_SERVO,
1419 self.POWER_CONTROL_MANUAL):
1420 self.power_off(power_method=power_method)
1421 time.sleep(self._POWER_CYCLE_TIMEOUT)
1422 self.power_on(power_method=power_method)
1423 else:
1424 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001425
1426
1427 def get_platform(self):
1428 """Determine the correct platform label for this host.
1429
1430 @returns a string representing this host's platform.
1431 """
1432 crossystem = utils.Crossystem(self)
1433 crossystem.init()
1434 # Extract fwid value and use the leading part as the platform id.
1435 # fwid generally follow the format of {platform}.{firmware version}
1436 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1437 platform = crossystem.fwid().split('.')[0].lower()
1438 # Newer platforms start with 'Google_' while the older ones do not.
1439 return platform.replace('google_', '')
1440
1441
Aviv Keshet74c89a92013-02-04 15:18:30 -08001442 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001443 def get_board(self):
1444 """Determine the correct board label for this host.
1445
1446 @returns a string representing this host's board.
1447 """
1448 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1449 run_method=self.run)
1450 board = release_info['CHROMEOS_RELEASE_BOARD']
1451 # Devices in the lab generally have the correct board name but our own
1452 # development devices have {board_name}-signed-{key_type}. The board
1453 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001454 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001455 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001456 return board_format_string % board.split('-')[0]
1457 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001458
1459
Aviv Keshet74c89a92013-02-04 15:18:30 -08001460 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001461 def has_lightsensor(self):
1462 """Determine the correct board label for this host.
1463
1464 @returns the string 'lightsensor' if this host has a lightsensor or
1465 None if it does not.
1466 """
1467 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001468 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001469 try:
1470 # Run the search cmd following the symlinks. Stderr_tee is set to
1471 # None as there can be a symlink loop, but the command will still
1472 # execute correctly with a few messages printed to stderr.
1473 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1474 return 'lightsensor'
1475 except error.AutoservRunError:
1476 # egrep exited with a return code of 1 meaning none of the possible
1477 # lightsensor files existed.
1478 return None
1479
1480
Aviv Keshet74c89a92013-02-04 15:18:30 -08001481 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001482 def has_bluetooth(self):
1483 """Determine the correct board label for this host.
1484
1485 @returns the string 'bluetooth' if this host has bluetooth or
1486 None if it does not.
1487 """
1488 try:
1489 self.run('test -d /sys/class/bluetooth/hci0')
1490 # test exited with a return code of 0.
1491 return 'bluetooth'
1492 except error.AutoservRunError:
1493 # test exited with a return code 1 meaning the directory did not
1494 # exist.
1495 return None
1496
1497
1498 def get_labels(self):
1499 """Return a list of labels for this given host.
1500
1501 This is the main way to retrieve all the automatic labels for a host
1502 as it will run through all the currently implemented label functions.
1503 """
1504 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001505 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001506 label = label_function(self)
1507 if label:
1508 labels.append(label)
1509 return labels