blob: d55ff71bb33e9617eeee71b12834dac136cbd335 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07006import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import logging
Dan Shi0f466e82013-02-22 15:44:58 -08008import os
Simran Basid5e5e272012-09-24 15:23:59 -07009import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080010import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070011import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070012import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070013import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070014
J. Richard Barnette45e93de2012-04-11 17:24:15 -070015from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080016from autotest_lib.client.common_lib import error
17from autotest_lib.client.common_lib import global_config
beeps687243d2013-07-18 15:29:27 -070018from autotest_lib.client.common_lib import site_utils
J. Richard Barnette45e93de2012-04-11 17:24:15 -070019from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080020from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080021from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080022from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070023from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070024from autotest_lib.server import autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070025from autotest_lib.server import utils as server_utils
Scott Zawalski89c44dd2013-02-26 09:28:02 -050026from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070027from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Fang Deng96667ca2013-08-01 17:46:18 -070028from autotest_lib.server.hosts import abstract_ssh
Fang Deng5d518f42013-08-02 14:04:32 -070029from autotest_lib.server.hosts import servo_host
beeps687243d2013-07-18 15:29:27 -070030from autotest_lib.site_utils.graphite import stats
Simran Basidcff4252012-11-20 16:13:20 -080031from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070032
33
beeps32a63082013-08-22 14:02:29 -070034try:
35 import jsonrpclib
36except ImportError:
37 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070038
Fang Dengd1c2b732013-08-20 12:59:46 -070039
beepsc87ff602013-07-31 21:53:00 -070040class FactoryImageCheckerException(error.AutoservError):
41 """Exception raised when an image is a factory image."""
42 pass
43
44
Aviv Keshet74c89a92013-02-04 15:18:30 -080045def add_label_detector(label_function_list, label_list=None, label=None):
46 """Decorator used to group functions together into the provided list.
47 @param label_function_list: List of label detecting functions to add
48 decorated function to.
49 @param label_list: List of detectable labels to add detectable labels to.
50 (Default: None)
51 @param label: Label string that is detectable by this detection function
52 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080053 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070054 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080055 """
56 @param func: The function to be added as a detector.
57 """
58 label_function_list.append(func)
59 if label and label_list is not None:
60 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070061 return func
62 return add_func
63
64
Fang Deng0ca40e22013-08-27 17:47:44 -070065class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070066 """Chromium OS specific subclass of Host."""
67
68 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050069 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070070
Richard Barnette03a0c132012-11-05 12:40:35 -080071 # Timeout values (in seconds) associated with various Chrome OS
72 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070073 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080074 # In general, a good rule of thumb is that the timeout can be up
75 # to twice the typical measured value on the slowest platform.
76 # The times here have not necessarily been empirically tested to
77 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070078 #
79 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080080 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
81 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070082 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080083 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080084 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070085 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080086 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080087 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080088 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
Chris Sosab76e0ee2013-05-22 16:55:41 -070089 # REBOOT_TIMEOUT: How long to wait for a reboot.
beepsf079cfb2013-09-18 17:49:51 -070090 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070091
92 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -080093 RESUME_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -070094 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070095 USB_BOOT_TIMEOUT = 150
Dan Shi2c88eed2013-11-12 10:18:38 -080096 POWERWASH_BOOT_TIMEOUT = 60
Chris Sosab76e0ee2013-05-22 16:55:41 -070097
98 # We have a long timeout to ensure we don't flakily fail due to other
99 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
Simran Basi1160e2c2013-10-04 16:00:24 -0700100 # TODO(sbasi - crbug.com/276094) Restore to 5 mins once the 'host did not
101 # return from reboot' bug is solved.
102 REBOOT_TIMEOUT = 480
Chris Sosab76e0ee2013-05-22 16:55:41 -0700103
beepsf079cfb2013-09-18 17:49:51 -0700104 INSTALL_TIMEOUT = 240
Richard Barnette03a0c132012-11-05 12:40:35 -0800105
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800106 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
107 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
108 _USB_POWER_TIMEOUT = 5
109 _POWER_CYCLE_TIMEOUT = 10
110
beeps32a63082013-08-22 14:02:29 -0700111 _RPC_PROXY_URL = 'http://localhost:%d'
Christopher Wileydd181852013-10-10 19:56:58 -0700112 _RPC_SHUTDOWN_POLLING_PERIOD_SECONDS = 2
113 _RPC_SHUTDOWN_TIMEOUT_SECONDS = 20
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800114
Richard Barnette82c35912012-11-20 10:09:10 -0800115 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
116 'rpm_recovery_boards', type=str).split(',')
117
118 _MAX_POWER_CYCLE_ATTEMPTS = 6
119 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
120 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
121 'host[0-9]+')
122 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
123 'in_illuminance0_raw',
124 'illuminance0_input']
125 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
126 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800127 _DETECTABLE_LABELS = []
128 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
129 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700130
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800131 # Constants used in ping_wait_up() and ping_wait_down().
132 #
133 # _PING_WAIT_COUNT is the approximate number of polling
134 # cycles to use when waiting for a host state change.
135 #
136 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
137 # for arguments to the internal _ping_wait_for_status()
138 # method.
139 _PING_WAIT_COUNT = 40
140 _PING_STATUS_DOWN = False
141 _PING_STATUS_UP = True
142
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800143 # Allowed values for the power_method argument.
144
145 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
146 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
147 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
148 POWER_CONTROL_RPM = 'RPM'
149 POWER_CONTROL_SERVO = 'servoj10'
150 POWER_CONTROL_MANUAL = 'manual'
151
152 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
153 POWER_CONTROL_SERVO,
154 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800155
Simran Basi5e6339a2013-03-21 11:34:32 -0700156 _RPM_OUTLET_CHANGED = 'outlet_changed'
157
beeps687243d2013-07-18 15:29:27 -0700158
J. Richard Barnette964fba02012-10-24 17:34:29 -0700159 @staticmethod
beeps46dadc92013-11-07 14:07:10 -0800160 def check_host(host, timeout=10):
161 """
162 Check if the given host is a chrome-os host.
163
164 @param host: An ssh host representing a device.
165 @param timeout: The timeout for the run command.
166
167 @return: True if the host device is chromeos.
168
169 @raises AutoservRunError: If the command failed.
170 @raises AutoservSSHTimeout: Ssh connection has timed out.
171 """
172 try:
173 result = host.run('cat /etc/lsb-release > /dev/null', timeout=timeout)
174 except (error.AutoservRunError, error.AutoservSSHTimeout):
175 return False
176 return result.exit_status == 0
177
178
179 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800180 def get_servo_arguments(args_dict):
181 """Extract servo options from `args_dict` and return the result.
182
183 Take the provided dictionary of argument options and return
184 a subset that represent standard arguments needed to
185 construct a servo object for a host. The intent is to
186 provide standard argument processing from run_remote_tests
187 for tests that require a servo to operate.
188
189 Recommended usage:
190 ~~~~~~~~
191 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700192 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800193 host = hosts.create_host(machine, servo_args=servo_args)
194 ~~~~~~~~
195
196 @param args_dict Dictionary from which to extract the servo
197 arguments.
198 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700199 servo_args = {}
200 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800201 if arg in args_dict:
202 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700203 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700204
J. Richard Barnette964fba02012-10-24 17:34:29 -0700205
Fang Dengd1c2b732013-08-20 12:59:46 -0700206 def _initialize(self, hostname, servo_args=None, ssh_verbosity_flag='',
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700207 ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700208 *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700209 """Initialize superclasses, and |self.servo|.
210
Fang Deng5d518f42013-08-02 14:04:32 -0700211 This method checks whether a servo is required by checking whether
212 servo_args is None. This method will only attempt to create a servo
213 object when servo is required by the test.
214
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700215 For creating the host servo object, there are three
216 possibilities: First, if the host is a lab system known to
217 have a servo board, we connect to that servo unconditionally.
218 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700219 servo features for testing, it will pass settings for
220 `servo_host`, `servo_port`, or both. If neither of these
221 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700222
223 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700224 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700225 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700226 # self.env is a dictionary of environment variable settings
227 # to be exported for commands run on the host.
228 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
229 # errors that might happen.
230 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700231 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700232 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700233 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700234 self.servo = None
235 # TODO(fdeng): We need to simplify the
236 # process of servo and servo_host initialization.
237 # crbug.com/298432
238 self._servo_host = self._create_servo_host(servo_args)
239 # TODO(fdeng): 'servo_args is not None' is used to determine whether
240 # a test needs a servo. Better solution is needed.
241 # There are three possible cases here:
242 # 1. servo_arg is None
243 # 2. servo arg is an empty dictionary
244 # 3. servo_arg is a dictionary that has entries of 'servo_host',
245 # 'servo_port'(optional).
246 # We assume that:
247 # a. A test that requires a servo always calls get_servo_arguments
248 # and passes in its return value as |servo_args|.
249 # b. get_servo_arguments never returns None.
250 # Based on the assumptions, we reason that only in case 2 and 3
251 # a servo is required, i.e. when the servo_args is not None.
252 if servo_args is not None:
253 self.servo = self._servo_host.create_healthy_servo_object()
254
255
256 def _create_servo_host(self, servo_args):
257 """Create a ServoHost object.
258
259 There three possible cases:
260 1) If the DUT is in Cros Lab and has a beaglebone and a servo, then
261 create a ServoHost object pointing to the beaglebone. servo_args
262 is ignored.
263 2) If not case 1) and servo_args is neither None nor empty, then
264 create a ServoHost object using servo_args.
265 3) If neither case 1) or 2) applies, return None.
266
267 @param servo_args: A dictionary that contains args for creating
268 a ServoHost object,
269 e.g. {'servo_host': '172.11.11.111',
270 'servo_port': 9999}.
271 See comments above.
272
273 @returns: A ServoHost object or None. See comments above.
274
275 """
276 servo_host_name = servo_host.make_servo_hostname(self.hostname)
277 if utils.host_is_in_lab_zone(servo_host_name):
278 return servo_host.ServoHost(servo_host=servo_host_name)
279 elif servo_args is not None:
280 return servo_host.ServoHost(**servo_args)
281 else:
282 return None
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700283
284
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500285 def get_repair_image_name(self):
286 """Generate a image_name from variables in the global config.
287
288 @returns a str of $board-version/$BUILD.
289
290 """
291 stable_version = global_config.global_config.get_config_value(
292 'CROS', 'stable_cros_version')
293 build_pattern = global_config.global_config.get_config_value(
294 'CROS', 'stable_build_pattern')
295 board = self._get_board_from_afe()
296 if board is None:
297 raise error.AutoservError('DUT has no board attribute, '
298 'cannot be repaired.')
299 return build_pattern % (board, stable_version)
300
301
Scott Zawalski62bacae2013-03-05 10:40:32 -0500302 def _host_in_AFE(self):
303 """Check if the host is an object the AFE knows.
304
305 @returns the host object.
306 """
307 return self._AFE.get_hosts(hostname=self.hostname)
308
309
Chris Sosab76e0ee2013-05-22 16:55:41 -0700310 def lookup_job_repo_url(self):
311 """Looks up the job_repo_url for the host.
312
313 @returns job_repo_url from AFE or None if not found.
314
315 @raises KeyError if the host does not have a job_repo_url
316 """
317 if not self._host_in_AFE():
318 return None
319
320 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700321 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
322 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700323
324
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500325 def clear_cros_version_labels_and_job_repo_url(self):
326 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500327 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400328 return
329
Scott Zawalski62bacae2013-03-05 10:40:32 -0500330 host_list = [self.hostname]
331 labels = self._AFE.get_labels(
332 name__startswith=ds_constants.VERSION_PREFIX,
333 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800334
Scott Zawalski62bacae2013-03-05 10:40:32 -0500335 for label in labels:
336 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500337
beepscb6f1e22013-06-28 19:14:10 -0700338 self.update_job_repo_url(None, None)
339
340
341 def update_job_repo_url(self, devserver_url, image_name):
342 """
343 Updates the job_repo_url host attribute and asserts it's value.
344
345 @param devserver_url: The devserver to use in the job_repo_url.
346 @param image_name: The name of the image to use in the job_repo_url.
347
348 @raises AutoservError: If we failed to update the job_repo_url.
349 """
350 repo_url = None
351 if devserver_url and image_name:
352 repo_url = tools.get_package_url(devserver_url, image_name)
353 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500354 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700355 if self.lookup_job_repo_url() != repo_url:
356 raise error.AutoservError('Failed to update job_repo_url with %s, '
357 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500358
359
Dan Shie9309262013-06-19 22:50:21 -0700360 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400361 """Add cros_version labels and host attribute job_repo_url.
362
363 @param image_name: The name of the image e.g.
364 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700365
Scott Zawalskieadbf702013-03-14 09:23:06 -0400366 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500367 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400368 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500369
Scott Zawalskieadbf702013-03-14 09:23:06 -0400370 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700371 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500372
373 labels = self._AFE.get_labels(name=cros_label)
374 if labels:
375 label = labels[0]
376 else:
377 label = self._AFE.create_label(name=cros_label)
378
379 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700380 self.update_job_repo_url(devserver_url, image_name)
381
382
beepsdae65fd2013-07-26 16:24:41 -0700383 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700384 """
385 Make sure job_repo_url of this host is valid.
386
joychen03eaad92013-06-26 09:55:21 -0700387 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700388 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
389 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
390 download and extract it. If the devserver embedded in the url is
391 unresponsive, update the job_repo_url of the host after staging it on
392 another devserver.
393
394 @param job_repo_url: A url pointing to the devserver where the autotest
395 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700396 @param tag: The tag from the server job, in the format
397 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700398
399 @raises DevServerException: If we could not resolve a devserver.
400 @raises AutoservError: If we're unable to save the new job_repo_url as
401 a result of choosing a new devserver because the old one failed to
402 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700403 @raises urllib2.URLError: If the devserver embedded in job_repo_url
404 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700405 """
406 job_repo_url = self.lookup_job_repo_url()
407 if not job_repo_url:
408 logging.warning('No job repo url set on host %s', self.hostname)
409 return
410
411 logging.info('Verifying job repo url %s', job_repo_url)
412 devserver_url, image_name = tools.get_devserver_build_from_package_url(
413 job_repo_url)
414
beeps0c865032013-07-30 11:37:06 -0700415 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700416
417 logging.info('Staging autotest artifacts for %s on devserver %s',
418 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700419
420 start_time = time.time()
beepscb6f1e22013-06-28 19:14:10 -0700421 ds.stage_artifacts(image_name, ['autotest'])
beeps687243d2013-07-18 15:29:27 -0700422 stage_time = time.time() - start_time
423
424 # Record how much of the verification time comes from a devserver
425 # restage. If we're doing things right we should not see multiple
426 # devservers for a given board/build/branch path.
427 try:
428 board, build_type, branch = site_utils.ParseBuildName(
429 image_name)[:3]
Chris Sosa65425082013-10-16 13:26:22 -0700430 except site_utils.ParseBuildNameException:
beeps687243d2013-07-18 15:29:27 -0700431 pass
432 else:
beeps0c865032013-07-30 11:37:06 -0700433 devserver = devserver_url[
Chris Sosa65425082013-10-16 13:26:22 -0700434 devserver_url.find('/') + 2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700435 stats_key = {
436 'board': board,
437 'build_type': build_type,
438 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700439 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700440 }
441 stats.Gauge('verify_job_repo_url').send(
442 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
443 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700444
Scott Zawalskieadbf702013-03-14 09:23:06 -0400445
Dan Shi0f466e82013-02-22 15:44:58 -0800446 def _try_stateful_update(self, update_url, force_update, updater):
447 """Try to use stateful update to initialize DUT.
448
449 When DUT is already running the same version that machine_install
450 tries to install, stateful update is a much faster way to clean up
451 the DUT for testing, compared to a full reimage. It is implemeted
452 by calling autoupdater.run_update, but skipping updating root, as
453 updating the kernel is time consuming and not necessary.
454
455 @param update_url: url of the image.
456 @param force_update: Set to True to update the image even if the DUT
457 is running the same version.
458 @param updater: ChromiumOSUpdater instance used to update the DUT.
459 @returns: True if the DUT was updated with stateful update.
460
461 """
462 if not updater.check_version():
463 return False
464 if not force_update:
465 logging.info('Canceling stateful update because the new and '
466 'old versions are the same.')
467 return False
468 # Following folders should be rebuilt after stateful update.
469 # A test file is used to confirm each folder gets rebuilt after
470 # the stateful update.
471 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
472 test_file = '.test_file_to_be_deleted'
473 for folder in folders_to_check:
474 touch_path = os.path.join(folder, test_file)
475 self.run('touch %s' % touch_path)
476
477 if not updater.run_update(force_update=True, update_root=False):
478 return False
479
480 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700481 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800482 check_file_cmd = 'test -f %s; echo $?'
483 for folder in folders_to_check:
484 test_file_path = os.path.join(folder, test_file)
485 result = self.run(check_file_cmd % test_file_path,
486 ignore_status=True)
487 if result.exit_status == 1:
488 return False
489 return True
490
491
J. Richard Barnette7275b612013-06-04 18:13:11 -0700492 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800493 """After the DUT is updated, confirm machine_install succeeded.
494
495 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700496 @param expected_kernel: kernel expected to be active after reboot,
497 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800498
499 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700500 # Touch the lab machine file to leave a marker that
501 # distinguishes this image from other test images.
502 # Afterwards, we must re-run the autoreboot script because
503 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800504 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800505 self.run('start autoreboot')
Chris Sosa65425082013-10-16 13:26:22 -0700506 updater.verify_boot_expectations(
507 expected_kernel, rollback_message=
508 'Build %s failed to boot on %s; system rolled back to previous'
509 'build' % (updater.update_version, self.hostname))
J. Richard Barnette7275b612013-06-04 18:13:11 -0700510 # Check that we've got the build we meant to install.
511 if not updater.check_version_to_confirm_install():
512 raise autoupdater.ChromiumOSError(
513 'Failed to update %s to build %s; found build '
514 '%s instead' % (self.hostname,
Chris Sosa65425082013-10-16 13:26:22 -0700515 updater.update_version,
516 updater.get_build_id()))
Dan Shi0f466e82013-02-22 15:44:58 -0800517
518
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700519 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400520 """Stage a build on a devserver and return the update_url.
521
522 @param image_name: a name like lumpy-release/R27-3837.0.0
523 @returns an update URL like:
524 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
525 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700526 if not image_name:
527 image_name = self.get_repair_image_name()
528 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400529 devserver = dev_server.ImageServer.resolve(image_name)
530 devserver.trigger_download(image_name, synchronous=False)
531 return tools.image_url_pattern() % (devserver.url(), image_name)
532
533
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700534 def stage_image_for_servo(self, image_name=None):
535 """Stage a build on a devserver and return the update_url.
536
537 @param image_name: a name like lumpy-release/R27-3837.0.0
538 @returns an update URL like:
539 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
540 """
541 if not image_name:
542 image_name = self.get_repair_image_name()
543 logging.info('Staging build for servo install: %s', image_name)
544 devserver = dev_server.ImageServer.resolve(image_name)
545 devserver.stage_artifacts(image_name, ['test_image'])
546 return devserver.get_test_image_url(image_name)
547
548
beepse539be02013-07-31 21:57:39 -0700549 def stage_factory_image_for_servo(self, image_name):
550 """Stage a build on a devserver and return the update_url.
551
552 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700553
beepse539be02013-07-31 21:57:39 -0700554 @return: An update URL, eg:
555 http://<devserver>/static/canary-channel/\
556 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700557
558 @raises: ValueError if the factory artifact name is missing from
559 the config.
560
beepse539be02013-07-31 21:57:39 -0700561 """
562 if not image_name:
563 logging.error('Need an image_name to stage a factory image.')
564 return
565
beeps12c0a3c2013-09-03 11:58:27 -0700566 factory_artifact = global_config.global_config.get_config_value(
567 'CROS', 'factory_artifact', type=str, default='')
568 if not factory_artifact:
569 raise ValueError('Cannot retrieve the factory artifact name from '
570 'autotest config, and hence cannot stage factory '
571 'artifacts.')
572
beepse539be02013-07-31 21:57:39 -0700573 logging.info('Staging build for servo install: %s', image_name)
574 devserver = dev_server.ImageServer.resolve(image_name)
575 devserver.stage_artifacts(
576 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700577 [factory_artifact],
578 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700579
580 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
581
582
Chris Sosaa3ac2152012-05-23 22:23:13 -0700583 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500584 local_devserver=False, repair=False):
585 """Install the DUT.
586
Dan Shi0f466e82013-02-22 15:44:58 -0800587 Use stateful update if the DUT is already running the same build.
588 Stateful update does not update kernel and tends to run much faster
589 than a full reimage. If the DUT is running a different build, or it
590 failed to do a stateful update, full update, including kernel update,
591 will be applied to the DUT.
592
Scott Zawalskieadbf702013-03-14 09:23:06 -0400593 Once a host enters machine_install its cros_version label will be
594 removed as well as its host attribute job_repo_url (used for
595 package install).
596
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500597 @param update_url: The url to use for the update
598 pattern: http://$devserver:###/update/$build
599 If update_url is None and repair is True we will install the
600 stable image listed in global_config under
601 CROS.stable_cros_version.
602 @param force_update: Force an update even if the version installed
603 is the same. Default:False
604 @param local_devserver: Used by run_remote_test to allow people to
605 use their local devserver. Default: False
606 @param repair: Whether or not we are in repair mode. This adds special
607 cases for repairing a machine like starting update_engine.
608 Setting repair to True sets force_update to True as well.
609 default: False
610 @raises autoupdater.ChromiumOSError
611
612 """
Dan Shi7458bf62013-06-10 12:50:16 -0700613 if update_url:
614 logging.debug('update url is set to %s', update_url)
615 else:
616 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700617 if self._parser.options.image:
618 requested_build = self._parser.options.image
619 if requested_build.startswith('http://'):
620 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700621 logging.debug('update url is retrieved from requested_build'
622 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700623 else:
624 # Try to stage any build that does not start with
625 # http:// on the devservers defined in
626 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700627 update_url = self._stage_image_for_update(requested_build)
628 logging.debug('Build staged, and update_url is set to: %s',
629 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700630 elif repair:
631 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700632 logging.debug('Build staged, and update_url is set to: %s',
633 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400634 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700635 raise autoupdater.ChromiumOSError(
636 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500637
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500638 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800639 # In case the system is in a bad state, we always reboot the machine
640 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700641 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500642 self.run('stop update-engine; start update-engine')
643 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800644
Chris Sosaa3ac2152012-05-23 22:23:13 -0700645 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700646 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800647 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400648 # Remove cros-version and job_repo_url host attribute from host.
649 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800650 # If the DUT is already running the same build, try stateful update
651 # first. Stateful update does not update kernel and tends to run much
652 # faster than a full reimage.
653 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700654 updated = self._try_stateful_update(
655 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800656 if updated:
657 logging.info('DUT is updated with stateful update.')
658 except Exception as e:
659 logging.exception(e)
660 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700661
Dan Shi0f466e82013-02-22 15:44:58 -0800662 inactive_kernel = None
663 # Do a full update if stateful update is not applicable or failed.
664 if not updated:
665 # In case the system is in a bad state, we always reboot the
666 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700667 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700668
669 # TODO(sosa): Remove temporary hack to get rid of bricked machines
670 # that can't update due to a corrupted policy.
671 self.run('rm -rf /var/lib/whitelist')
672 self.run('touch /var/lib/whitelist')
673 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400674 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700675
Dan Shi0f466e82013-02-22 15:44:58 -0800676 if updater.run_update(force_update):
677 updated = True
678 # Figure out active and inactive kernel.
679 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700680
Dan Shi0f466e82013-02-22 15:44:58 -0800681 # Ensure inactive kernel has higher priority than active.
682 if (updater.get_kernel_priority(inactive_kernel)
683 < updater.get_kernel_priority(active_kernel)):
684 raise autoupdater.ChromiumOSError(
685 'Update failed. The priority of the inactive kernel'
686 ' partition is less than that of the active kernel'
687 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700688
Dan Shi0f466e82013-02-22 15:44:58 -0800689 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700690 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700691
Dan Shi0f466e82013-02-22 15:44:58 -0800692 if updated:
693 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400694 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700695 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800696
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700697 # Clean up any old autotest directories which may be lying around.
698 for path in global_config.global_config.get_config_value(
699 'AUTOSERV', 'client_autodir_paths', type=list):
700 self.run('rm -rf ' + path)
701
702
Dan Shi10e992b2013-08-30 11:02:59 -0700703 def show_update_engine_log(self):
704 """Output update engine log."""
705 logging.debug('Dumping %s', constants.UPDATE_ENGINE_LOG)
706 self.run('cat %s' % constants.UPDATE_ENGINE_LOG)
707
708
Richard Barnette82c35912012-11-20 10:09:10 -0800709 def _get_board_from_afe(self):
710 """Retrieve this host's board from its labels in the AFE.
711
712 Looks for a host label of the form "board:<board>", and
713 returns the "<board>" part of the label. `None` is returned
714 if there is not a single, unique label matching the pattern.
715
716 @returns board from label, or `None`.
717 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700718 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800719
720
721 def get_build(self):
722 """Retrieve the current build for this Host from the AFE.
723
724 Looks through this host's labels in the AFE to determine its build.
725
726 @returns The current build or None if it could not find it or if there
727 were multiple build labels assigned to this host.
728 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700729 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800730
731
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500732 def _install_repair(self):
733 """Attempt to repair this host using upate-engine.
734
735 If the host is up, try installing the DUT with a stable
736 "repair" version of Chrome OS as defined in the global_config
737 under CROS.stable_cros_version.
738
Scott Zawalski62bacae2013-03-05 10:40:32 -0500739 @raises AutoservRepairMethodNA if the DUT is not reachable.
740 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500741
742 """
743 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500744 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500745
746 logging.info('Attempting to reimage machine to repair image.')
747 try:
748 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700749 except autoupdater.ChromiumOSError as e:
750 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500751 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500752 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500753
754
Dan Shi2c88eed2013-11-12 10:18:38 -0800755 def _install_repair_with_powerwash(self):
Dan Shi9cc48452013-11-12 12:39:26 -0800756 """Attempt to powerwash first then repair this host using update-engine.
Dan Shi2c88eed2013-11-12 10:18:38 -0800757
Dan Shi9cc48452013-11-12 12:39:26 -0800758 update-engine may fail due to a bad image. In such case, powerwash
759 may help to cleanup the DUT for update-engine to work again.
Dan Shi2c88eed2013-11-12 10:18:38 -0800760
761 @raises AutoservRepairMethodNA if the DUT is not reachable.
762 @raises ChromiumOSError if the install failed for some reason.
763
764 """
765 if not self.is_up():
766 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
767
768 logging.info('Attempting to powerwash the DUT.')
769 self.run('echo "fast safe" > '
770 '/mnt/stateful_partition/factory_install_reset')
771 self.reboot(timeout=self.POWERWASH_BOOT_TIMEOUT, wait=True)
772 if not self.is_up():
Dan Shi9cc48452013-11-12 12:39:26 -0800773 logging.error('Powerwash failed. DUT did not come back after '
Dan Shi2c88eed2013-11-12 10:18:38 -0800774 'reboot.')
775 raise error.AutoservRepairFailure(
776 'DUT failed to boot from powerwash after %d seconds' %
777 self.POWERWASH_BOOT_TIMEOUT)
778
779 logging.info('Powerwash succeeded.')
780 self._install_repair()
781
782
beepsf079cfb2013-09-18 17:49:51 -0700783 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
784 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500785 """
786 Re-install the OS on the DUT by:
787 1) installing a test image on a USB storage device attached to the Servo
788 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800789 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700790 3) installing the image with chromeos-install.
791
Scott Zawalski62bacae2013-03-05 10:40:32 -0500792 @param image_url: If specified use as the url to install on the DUT.
793 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700794 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
795 Factory images need a longer usb_boot_timeout than regular
796 cros images.
797 @param install_timeout: The timeout to use when installing the chromeos
798 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800799
Scott Zawalski62bacae2013-03-05 10:40:32 -0500800 @raises AutoservError if the image fails to boot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800801 """
beepsf079cfb2013-09-18 17:49:51 -0700802
803 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
804 % usb_boot_timeout)
805 logging.info('Downloading image to USB, then booting from it. Usb boot '
806 'timeout = %s', usb_boot_timeout)
807 timer = stats.Timer(usb_boot_timer_key)
808 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700809 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700810 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500811 raise error.AutoservRepairFailure(
812 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700813 usb_boot_timeout)
814 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500815
beepsf079cfb2013-09-18 17:49:51 -0700816 install_timer_key = ('servo_install.install_timeout_%s'
817 % install_timeout)
818 timer = stats.Timer(install_timer_key)
819 timer.start()
820 logging.info('Installing image through chromeos-install.')
821 self.run('chromeos-install --yes', timeout=install_timeout)
822 timer.stop()
823
824 logging.info('Power cycling DUT through servo.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800825 self.servo.power_long_press()
Fang Dengafb88142013-05-30 17:44:31 -0700826 self.servo.switch_usbkey('off')
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700827 # We *must* use power_on() here; on Parrot it's how we get
828 # out of recovery mode.
829 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -0700830
831 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800832 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
833 raise error.AutoservError('DUT failed to reboot installed '
834 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500835 self.BOOT_TIMEOUT)
836
837
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700838 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500839 """Reinstall the DUT utilizing servo and a test image.
840
841 Re-install the OS on the DUT by:
842 1) installing a test image on a USB storage device attached to the Servo
843 board,
844 2) booting that image in recovery mode, and then
845 3) installing the image with chromeos-install.
846
Scott Zawalski62bacae2013-03-05 10:40:32 -0500847 @raises AutoservRepairMethodNA if the device does not have servo
848 support.
849
850 """
851 if not self.servo:
852 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
853 'DUT has no servo support.')
854
855 logging.info('Attempting to recovery servo enabled device with '
856 'servo_repair_reinstall')
857
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700858 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500859 self.servo_install(image_url)
860
861
862 def _servo_repair_power(self):
863 """Attempt to repair DUT using an attached Servo.
864
865 Attempt to power on the DUT via power_long_press.
866
867 @raises AutoservRepairMethodNA if the device does not have servo
868 support.
869 @raises AutoservRepairFailure if the repair fails for any reason.
870 """
871 if not self.servo:
872 raise error.AutoservRepairMethodNA('Repair Power NA: '
873 'DUT has no servo support.')
874
875 logging.info('Attempting to recover servo enabled device by '
876 'powering it off and on.')
877 self.servo.get_power_state_controller().power_off()
878 self.servo.get_power_state_controller().power_on()
879 if self.wait_up(self.BOOT_TIMEOUT):
880 return
881
882 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800883
884
Richard Barnette82c35912012-11-20 10:09:10 -0800885 def _powercycle_to_repair(self):
886 """Utilize the RPM Infrastructure to bring the host back up.
887
888 If the host is not up/repaired after the first powercycle we utilize
889 auto fallback to the last good install by powercycling and rebooting the
890 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500891
892 @raises AutoservRepairMethodNA if the device does not support remote
893 power.
894 @raises AutoservRepairFailure if the repair fails for any reason.
895
Richard Barnette82c35912012-11-20 10:09:10 -0800896 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500897 if not self.has_power():
898 raise error.AutoservRepairMethodNA('Device does not support power.')
899
Richard Barnette82c35912012-11-20 10:09:10 -0800900 logging.info('Attempting repair via RPM powercycle.')
901 failed_cycles = 0
902 self.power_cycle()
903 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
904 failed_cycles += 1
905 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -0500906 raise error.AutoservRepairFailure(
907 'Powercycled host %s %d times; device did not come back'
908 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -0800909 self.power_cycle()
910 if failed_cycles == 0:
911 logging.info('Powercycling was successful first time.')
912 else:
913 logging.info('Powercycling was successful after %d failures.',
914 failed_cycles)
915
916
917 def repair_full(self):
918 """Repair a host for repair level NO_PROTECTION.
919
920 This overrides the base class function for repair; it does
921 not call back to the parent class, but instead offers a
922 simplified implementation based on the capabilities in the
923 Chrome OS test lab.
924
Fang Deng5d518f42013-08-02 14:04:32 -0700925 It first verifies and repairs servo if it is a DUT in CrOS
Fang Deng03590af2013-10-07 17:34:20 -0700926 lab and a servo is attached.
Fang Deng5d518f42013-08-02 14:04:32 -0700927
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700928 If `self.verify()` fails, the following procedures are
929 attempted:
930 1. Try to re-install to a known stable image using
931 auto-update.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500932 2. If there's a servo for the DUT, try to power the DUT off and
933 on.
934 3. If there's a servo for the DUT, try to re-install via
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700935 the servo.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500936 4. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800937 by power-cycling.
938
939 As with the parent method, the last operation performed on
940 the DUT must be to call `self.verify()`; if that call fails,
941 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700942
Scott Zawalski62bacae2013-03-05 10:40:32 -0500943 @raises AutoservRepairTotalFailure if the repair process fails to
944 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -0700945 @raises ServoHostRepairTotalFailure if the repair process fails to
946 fix the servo host if one is attached to the DUT.
947 @raises AutoservSshPermissionDeniedError if it is unable
948 to ssh to the servo host due to permission error.
949
Richard Barnette82c35912012-11-20 10:09:10 -0800950 """
Fang Deng5d518f42013-08-02 14:04:32 -0700951 if self._servo_host:
Fang Deng03590af2013-10-07 17:34:20 -0700952 try:
953 self.servo = self._servo_host.create_healthy_servo_object()
954 except Exception as e:
955 self.servo = None
956 logging.error('Could not create a healthy servo: %s', e)
Fang Deng5d518f42013-08-02 14:04:32 -0700957
Scott Zawalski62bacae2013-03-05 10:40:32 -0500958 # TODO(scottz): This should use something similar to label_decorator,
959 # but needs to be populated in order so DUTs are repaired with the
960 # least amount of effort.
Dan Shi2c88eed2013-11-12 10:18:38 -0800961 repair_funcs = [self._install_repair,
962 self._install_repair_with_powerwash,
963 self._servo_repair_power,
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700964 self._servo_repair_reinstall,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500965 self._powercycle_to_repair]
966 errors = []
Simran Basie6130932013-10-01 14:07:52 -0700967 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500968 for repair_func in repair_funcs:
969 try:
970 repair_func()
971 self.verify()
Simran Basie6130932013-10-01 14:07:52 -0700972 stats.Counter(
973 '%s.SUCCEEDED' % repair_func.__name__).increment()
974 if board:
975 stats.Counter(
976 '%s.SUCCEEDED.%s' % (repair_func.__name__,
977 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500978 return
Simran Basie6130932013-10-01 14:07:52 -0700979 except error.AutoservRepairMethodNA as e:
980 stats.Counter(
981 '%s.RepairNA' % repair_func.__name__).increment()
982 if board:
983 stats.Counter(
984 '%s.RepairNA.%s' % (repair_func.__name__,
985 board)).increment()
986 logging.warn('Repair function NA: %s', e)
987 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -0500988 except Exception as e:
Simran Basie6130932013-10-01 14:07:52 -0700989 stats.Counter(
990 '%s.FAILED' % repair_func.__name__).increment()
991 if board:
992 stats.Counter(
993 '%s.FAILED.%s' % (repair_func.__name__,
994 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500995 logging.warn('Failed to repair device: %s', e)
996 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500997
Simran Basie6130932013-10-01 14:07:52 -0700998 stats.Counter('Full_Repair_Failed').increment()
999 if board:
1000 stats.Counter(
1001 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -05001002 raise error.AutoservRepairTotalFailure(
1003 'All attempts at repairing the device failed:\n%s' %
1004 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -08001005
1006
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001007 def close(self):
beeps32a63082013-08-22 14:02:29 -07001008 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -07001009 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001010
1011
Simran Basi5e6339a2013-03-21 11:34:32 -07001012 def _cleanup_poweron(self):
1013 """Special cleanup method to make sure hosts always get power back."""
1014 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1015 hosts = afe.get_hosts(hostname=self.hostname)
1016 if not hosts or not (self._RPM_OUTLET_CHANGED in
1017 hosts[0].attributes):
1018 return
1019 logging.debug('This host has recently interacted with the RPM'
1020 ' Infrastructure. Ensuring power is on.')
1021 try:
1022 self.power_on()
1023 except rpm_client.RemotePowerException:
1024 # If cleanup has completed but there was an issue with the RPM
1025 # Infrastructure, log an error message rather than fail cleanup
1026 logging.error('Failed to turn Power On for this host after '
1027 'cleanup through the RPM Infrastructure.')
1028 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1029 hostname=self.hostname)
1030
1031
beepsc87ff602013-07-31 21:53:00 -07001032 def _is_factory_image(self):
1033 """Checks if the image on the DUT is a factory image.
1034
1035 @return: True if the image on the DUT is a factory image.
1036 False otherwise.
1037 """
1038 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1039 return result.exit_status == 0
1040
1041
1042 def _restart_ui(self):
1043 """Restarts ui.
1044
1045 @raises: FactoryImageCheckerException for factory images, since
1046 we cannot attempt to restart ui on them.
1047 error.AutoservRunError for any other type of error that
1048 occurs while restarting ui.
1049 """
1050 if self._is_factory_image():
1051 raise FactoryImageCheckerException('Cannot restart ui on factory '
1052 'images')
1053
Chris Sosaf4d43ff2012-10-30 11:21:05 -07001054 client_at = autotest.Autotest(self)
beepsc87ff602013-07-31 21:53:00 -07001055 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
1056 '_clear_login_prompt_state')
1057 self.run('restart ui')
1058 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
1059 '_wait_for_login_prompt')
1060
1061
1062 def cleanup(self):
Richard Barnette82c35912012-11-20 10:09:10 -08001063 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001064 try:
beepsc87ff602013-07-31 21:53:00 -07001065 self._restart_ui()
1066 except (error.AutotestRunError, error.AutoservRunError,
1067 FactoryImageCheckerException):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001068 logging.warn('Unable to restart ui, rebooting device.')
1069 # Since restarting the UI fails fall back to normal Autotest
1070 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001071 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001072 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001073 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001074 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001075
1076
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001077 def reboot(self, **dargs):
1078 """
1079 This function reboots the site host. The more generic
1080 RemoteHost.reboot() performs sync and sleeps for 5
1081 seconds. This is not necessary for Chrome OS devices as the
1082 sync should be finished in a short time during the reboot
1083 command.
1084 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001085 if 'reboot_cmd' not in dargs:
1086 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
1087 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001088 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001089 if 'fastsync' not in dargs:
1090 dargs['fastsync'] = True
Fang Deng0ca40e22013-08-27 17:47:44 -07001091 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001092
1093
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001094 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001095 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001096
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001097 Tests for the following conditions:
1098 1. All conditions tested by the parent version of this
1099 function.
1100 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001101 3. Sufficient space in /mnt/stateful_partition/encrypted.
1102 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001103
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001104 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001105 super(CrosHost, self).verify_software()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001106 self.check_diskspace(
1107 '/mnt/stateful_partition',
1108 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001109 'SERVER', 'gb_diskspace_required', type=float,
1110 default=20.0))
1111 self.check_diskspace(
1112 '/mnt/stateful_partition/encrypted',
1113 global_config.global_config.get_config_value(
1114 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1115 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001116
1117 # Factory images don't run update engine,
1118 # goofy controls dbus on these DUTs.
1119 if not self._is_factory_image():
1120 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001121 # Makes sure python is present, loads and can use built in functions.
1122 # We have seen cases where importing cPickle fails with undefined
1123 # symbols in cPickle.so.
1124 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001125
1126
Fang Deng96667ca2013-08-01 17:46:18 -07001127 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1128 connect_timeout=None, alive_interval=None):
1129 """Override default make_ssh_command to use options tuned for Chrome OS.
1130
1131 Tuning changes:
1132 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1133 connection failure. Consistency with remote_access.sh.
1134
1135 - ServerAliveInterval=180; which causes SSH to ping connection every
1136 180 seconds. In conjunction with ServerAliveCountMax ensures
1137 that if the connection dies, Autotest will bail out quickly.
1138 Originally tried 60 secs, but saw frequent job ABORTS where
1139 the test completed successfully.
1140
1141 - ServerAliveCountMax=3; consistency with remote_access.sh.
1142
1143 - ConnectAttempts=4; reduce flakiness in connection errors;
1144 consistency with remote_access.sh.
1145
1146 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1147 Host keys change with every new installation, don't waste
1148 memory/space saving them.
1149
1150 - SSH protocol forced to 2; needed for ServerAliveInterval.
1151
1152 @param user User name to use for the ssh connection.
1153 @param port Port on the target host to use for ssh connection.
1154 @param opts Additional options to the ssh command.
1155 @param hosts_file Ignored.
1156 @param connect_timeout Ignored.
1157 @param alive_interval Ignored.
1158 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001159 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1160 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001161 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1162 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1163 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1164 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001165 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1166 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001167
1168
beeps32a63082013-08-22 14:02:29 -07001169 def _create_ssh_tunnel(self, port, local_port):
1170 """Create an ssh tunnel from local_port to port.
1171
1172 @param port: remote port on the host.
1173 @param local_port: local forwarding port.
1174
1175 @return: the tunnel process.
1176 """
1177 # Chrome OS on the target closes down most external ports
1178 # for security. We could open the port, but doing that
1179 # would conflict with security tests that check that only
1180 # expected ports are open. So, to get to the port on the
1181 # target we use an ssh tunnel.
1182 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1183 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1184 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1185 logging.debug('Full tunnel command: %s', tunnel_cmd)
1186 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1187 logging.debug('Started ssh tunnel, local = %d'
1188 ' remote = %d, pid = %d',
1189 local_port, port, tunnel_proc.pid)
1190 return tunnel_proc
1191
1192
Christopher Wileydd181852013-10-10 19:56:58 -07001193 def _setup_rpc(self, port, command_name, remote_pid=None):
beeps32a63082013-08-22 14:02:29 -07001194 """Sets up a tunnel process and performs rpc connection book keeping.
1195
1196 This method assumes that xmlrpc and jsonrpc never conflict, since
1197 we can only either have an xmlrpc or a jsonrpc server listening on
1198 a remote port. As such, it enforces a single proxy->remote port
1199 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1200 and then tries to start an xmlrpc proxy forwarded to the same port,
1201 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1202
1203 1. None of the methods on the xmlrpc proxy will work because
1204 the server listening on B is jsonrpc.
1205
1206 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1207 server, as the only use case currently is goofy, which is tied to
1208 the factory image. It is much easier to handle a failed xmlrpc
1209 call on the client than it is to terminate goofy in this scenario,
1210 as doing the latter might leave the DUT in a hard to recover state.
1211
1212 With the current implementation newer rpc proxy connections will
1213 terminate the tunnel processes of older rpc connections tunneling
1214 to the same remote port. If methods are invoked on the client
1215 after this has happened they will fail with connection closed errors.
1216
1217 @param port: The remote forwarding port.
1218 @param command_name: The name of the remote process, to terminate
1219 using pkill.
1220
1221 @return A url that we can use to initiate the rpc connection.
1222 """
1223 self.rpc_disconnect(port)
1224 local_port = utils.get_unused_port()
1225 tunnel_proc = self._create_ssh_tunnel(port, local_port)
Christopher Wileydd181852013-10-10 19:56:58 -07001226 self._rpc_proxy_map[port] = (command_name, tunnel_proc, remote_pid)
beeps32a63082013-08-22 14:02:29 -07001227 return self._RPC_PROXY_URL % local_port
1228
1229
Christopher Wileyd78249a2013-03-01 13:05:31 -08001230 def xmlrpc_connect(self, command, port, command_name=None,
1231 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001232 """Connect to an XMLRPC server on the host.
1233
1234 The `command` argument should be a simple shell command that
1235 starts an XMLRPC server on the given `port`. The command
1236 must not daemonize, and must terminate cleanly on SIGTERM.
1237 The command is started in the background on the host, and a
1238 local XMLRPC client for the server is created and returned
1239 to the caller.
1240
1241 Note that the process of creating an XMLRPC client makes no
1242 attempt to connect to the remote server; the caller is
1243 responsible for determining whether the server is running
1244 correctly, and is ready to serve requests.
1245
Christopher Wileyd78249a2013-03-01 13:05:31 -08001246 Optionally, the caller can pass ready_test_name, a string
1247 containing the name of a method to call on the proxy. This
1248 method should take no parameters and return successfully only
1249 when the server is ready to process client requests. When
1250 ready_test_name is set, xmlrpc_connect will block until the
1251 proxy is ready, and throw a TestError if the server isn't
1252 ready by timeout_seconds.
1253
beeps32a63082013-08-22 14:02:29 -07001254 If a server is already running on the remote port, this
1255 method will kill it and disconnect the tunnel process
1256 associated with the connection before establishing a new one,
1257 by consulting the rpc_proxy_map in rpc_disconnect.
1258
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001259 @param command Shell command to start the server.
1260 @param port Port number on which the server is expected to
1261 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001262 @param command_name String to use as input to `pkill` to
1263 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001264 @param ready_test_name String containing the name of a
1265 method defined on the XMLRPC server.
1266 @param timeout_seconds Number of seconds to wait
1267 for the server to become 'ready.' Will throw a
1268 TestFail error if server is not ready in time.
1269
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001270 """
Christopher Wileyc14f06a2013-10-16 13:55:39 -07001271 # Clean up any existing state. If the caller is willing
1272 # to believe their server is down, we ought to clean up
1273 # any tunnels we might have sitting around.
1274 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001275 # Start the server on the host. Redirection in the command
1276 # below is necessary, because 'ssh' won't terminate until
1277 # background child processes close stdin, stdout, and
1278 # stderr.
Christopher Wileydd181852013-10-10 19:56:58 -07001279 remote_cmd = '%s </dev/null >/dev/null 2>&1 & echo $!' % command
1280 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001281 logging.debug('Started XMLRPC server on host %s, pid = %s',
1282 self.hostname, remote_pid)
1283
Christopher Wileydd181852013-10-10 19:56:58 -07001284 # Tunnel through SSH to be able to reach that remote port.
1285 rpc_url = self._setup_rpc(port, command_name, remote_pid=remote_pid)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001286 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001287
Christopher Wileyd78249a2013-03-01 13:05:31 -08001288 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001289 # retry.retry logs each attempt; calculate delay_sec to
1290 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001291 @retry.retry((socket.error,
1292 xmlrpclib.ProtocolError,
1293 httplib.BadStatusLine),
Chris Sosa65425082013-10-16 13:26:22 -07001294 timeout_min=timeout_seconds / 60.0,
1295 delay_sec=min(max(timeout_seconds / 20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001296 def ready_test():
1297 """ Call proxy.ready_test_name(). """
1298 getattr(proxy, ready_test_name)()
1299 successful = False
1300 try:
1301 logging.info('Waiting %d seconds for XMLRPC server '
1302 'to start.', timeout_seconds)
1303 ready_test()
1304 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001305 finally:
1306 if not successful:
1307 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001308 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001309 logging.info('XMLRPC server started successfully.')
1310 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001311
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001312
Jason Abeleb6f924f2013-11-13 16:01:54 -08001313 def syslog(self, message, tag='autotest'):
1314 """Logs a message to syslog on host.
1315
1316 @param message String message to log into syslog
1317 @param tag String tag prefix for syslog
1318
1319 """
1320 self.run('logger -t "%s" "%s"' % (tag, message))
1321
1322
beeps32a63082013-08-22 14:02:29 -07001323 def jsonrpc_connect(self, port):
1324 """Creates a jsonrpc proxy connection through an ssh tunnel.
1325
1326 This method exists to facilitate communication with goofy (which is
1327 the default system manager on all factory images) and as such, leaves
1328 most of the rpc server sanity checking to the caller. Unlike
1329 xmlrpc_connect, this method does not facilitate the creation of a remote
1330 jsonrpc server, as the only clients of this code are factory tests,
1331 for which the goofy system manager is built in to the image and starts
1332 when the target boots.
1333
1334 One can theoretically create multiple jsonrpc proxies all forwarded
1335 to the same remote port, provided the remote port has an rpc server
1336 listening. However, in doing so we stand the risk of leaking an
1337 existing tunnel process, so we always disconnect any older tunnels
1338 we might have through rpc_disconnect.
1339
1340 @param port: port on the remote host that is serving this proxy.
1341
1342 @return: The client proxy.
1343 """
1344 if not jsonrpclib:
1345 logging.warning('Jsonrpclib could not be imported. Check that '
1346 'site-packages contains jsonrpclib.')
1347 return None
1348
1349 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1350
1351 logging.info('Established a jsonrpc connection through port %s.', port)
1352 return proxy
1353
1354
1355 def rpc_disconnect(self, port):
1356 """Disconnect from an RPC server on the host.
1357
1358 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001359 the given `port`. Also closes the local ssh tunnel created
1360 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001361 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001362 client object; however disconnection will cause all
1363 subsequent calls to methods on the object to fail.
1364
1365 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001366 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001367
1368 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001369 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001370 """
beeps32a63082013-08-22 14:02:29 -07001371 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001372 return
Christopher Wileydd181852013-10-10 19:56:58 -07001373 remote_name, tunnel_proc, remote_pid = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001374 if remote_name:
1375 # We use 'pkill' to find our target process rather than
1376 # a PID, because the host may have rebooted since
1377 # connecting, and we don't want to kill an innocent
1378 # process with the same PID.
1379 #
1380 # 'pkill' helpfully exits with status 1 if no target
1381 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001382 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001383 # status.
1384 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001385 if remote_pid:
1386 logging.info('Waiting for RPC server "%s" shutdown',
1387 remote_name)
1388 start_time = time.time()
1389 while (time.time() - start_time <
1390 self._RPC_SHUTDOWN_TIMEOUT_SECONDS):
1391 running_processes = self.run(
1392 "pgrep -f '%s'" % remote_name,
1393 ignore_status=True).stdout.split()
1394 if not remote_pid in running_processes:
1395 logging.info('Shut down RPC server.')
1396 break
1397 time.sleep(self._RPC_SHUTDOWN_POLLING_PERIOD_SECONDS)
1398 else:
1399 raise error.TestError('Failed to shutdown RPC server %s' %
1400 remote_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001401
1402 if tunnel_proc.poll() is None:
1403 tunnel_proc.terminate()
1404 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1405 else:
1406 logging.debug('Tunnel pid %d terminated early, status %d',
1407 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001408 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001409
1410
beeps32a63082013-08-22 14:02:29 -07001411 def rpc_disconnect_all(self):
1412 """Disconnect all known RPC proxy ports."""
1413 for port in self._rpc_proxy_map.keys():
1414 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001415
1416
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001417 def _ping_check_status(self, status):
1418 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001419
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001420 @param status Check the ping status against this value.
1421 @return True iff `status` and the result of ping are the same
1422 (i.e. both True or both False).
1423
1424 """
1425 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1426 return not (status ^ (ping_val == 0))
1427
1428 def _ping_wait_for_status(self, status, timeout):
1429 """Wait for the host to have a given status (UP or DOWN).
1430
1431 Status is checked by polling. Polling will not last longer
1432 than the number of seconds in `timeout`. The polling
1433 interval will be long enough that only approximately
1434 _PING_WAIT_COUNT polling cycles will be executed, subject
1435 to a maximum interval of about one minute.
1436
1437 @param status Waiting will stop immediately if `ping` of the
1438 host returns this status.
1439 @param timeout Poll for at most this many seconds.
1440 @return True iff the host status from `ping` matched the
1441 requested status at the time of return.
1442
1443 """
1444 # _ping_check_status() takes about 1 second, hence the
1445 # "- 1" in the formula below.
1446 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1447 end_time = time.time() + timeout
1448 while time.time() <= end_time:
1449 if self._ping_check_status(status):
1450 return True
1451 if poll_interval > 0:
1452 time.sleep(poll_interval)
1453
1454 # The last thing we did was sleep(poll_interval), so it may
1455 # have been too long since the last `ping`. Check one more
1456 # time, just to be sure.
1457 return self._ping_check_status(status)
1458
1459 def ping_wait_up(self, timeout):
1460 """Wait for the host to respond to `ping`.
1461
1462 N.B. This method is not a reliable substitute for
1463 `wait_up()`, because a host that responds to ping will not
1464 necessarily respond to ssh. This method should only be used
1465 if the target DUT can be considered functional even if it
1466 can't be reached via ssh.
1467
1468 @param timeout Minimum time to allow before declaring the
1469 host to be non-responsive.
1470 @return True iff the host answered to ping before the timeout.
1471
1472 """
1473 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001474
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001475 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001476 """Wait until the host no longer responds to `ping`.
1477
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001478 This function can be used as a slightly faster version of
1479 `wait_down()`, by avoiding potentially long ssh timeouts.
1480
1481 @param timeout Minimum time to allow for the host to become
1482 non-responsive.
1483 @return True iff the host quit answering ping before the
1484 timeout.
1485
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001486 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001487 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001488
1489 def test_wait_for_sleep(self):
1490 """Wait for the client to enter low-power sleep mode.
1491
1492 The test for "is asleep" can't distinguish a system that is
1493 powered off; to confirm that the unit was asleep, it is
1494 necessary to force resume, and then call
1495 `test_wait_for_resume()`.
1496
1497 This function is expected to be called from a test as part
1498 of a sequence like the following:
1499
1500 ~~~~~~~~
1501 boot_id = host.get_boot_id()
1502 # trigger sleep on the host
1503 host.test_wait_for_sleep()
1504 # trigger resume on the host
1505 host.test_wait_for_resume(boot_id)
1506 ~~~~~~~~
1507
1508 @exception TestFail The host did not go to sleep within
1509 the allowed time.
1510 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001511 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001512 raise error.TestFail(
1513 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001514 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001515
1516
1517 def test_wait_for_resume(self, old_boot_id):
1518 """Wait for the client to resume from low-power sleep mode.
1519
1520 The `old_boot_id` parameter should be the value from
1521 `get_boot_id()` obtained prior to entering sleep mode. A
1522 `TestFail` exception is raised if the boot id changes.
1523
1524 See @ref test_wait_for_sleep for more on this function's
1525 usage.
1526
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001527 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001528 target host went to sleep.
1529
1530 @exception TestFail The host did not respond within the
1531 allowed time.
1532 @exception TestFail The host responded, but the boot id test
1533 indicated a reboot rather than a sleep
1534 cycle.
1535 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001536 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001537 raise error.TestFail(
1538 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001539 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001540 else:
1541 new_boot_id = self.get_boot_id()
1542 if new_boot_id != old_boot_id:
1543 raise error.TestFail(
1544 'client rebooted, but sleep was expected'
1545 ' (old boot %s, new boot %s)'
1546 % (old_boot_id, new_boot_id))
1547
1548
1549 def test_wait_for_shutdown(self):
1550 """Wait for the client to shut down.
1551
1552 The test for "has shut down" can't distinguish a system that
1553 is merely asleep; to confirm that the unit was down, it is
1554 necessary to force boot, and then call test_wait_for_boot().
1555
1556 This function is expected to be called from a test as part
1557 of a sequence like the following:
1558
1559 ~~~~~~~~
1560 boot_id = host.get_boot_id()
1561 # trigger shutdown on the host
1562 host.test_wait_for_shutdown()
1563 # trigger boot on the host
1564 host.test_wait_for_boot(boot_id)
1565 ~~~~~~~~
1566
1567 @exception TestFail The host did not shut down within the
1568 allowed time.
1569 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001570 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001571 raise error.TestFail(
1572 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001573 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001574
1575
1576 def test_wait_for_boot(self, old_boot_id=None):
1577 """Wait for the client to boot from cold power.
1578
1579 The `old_boot_id` parameter should be the value from
1580 `get_boot_id()` obtained prior to shutting down. A
1581 `TestFail` exception is raised if the boot id does not
1582 change. The boot id test is omitted if `old_boot_id` is not
1583 specified.
1584
1585 See @ref test_wait_for_shutdown for more on this function's
1586 usage.
1587
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001588 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001589 shut down.
1590
1591 @exception TestFail The host did not respond within the
1592 allowed time.
1593 @exception TestFail The host responded, but the boot id test
1594 indicated that there was no reboot.
1595 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001596 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001597 raise error.TestFail(
1598 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001599 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001600 elif old_boot_id:
1601 if self.get_boot_id() == old_boot_id:
1602 raise error.TestFail(
1603 'client is back up, but did not reboot'
1604 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001605
1606
1607 @staticmethod
1608 def check_for_rpm_support(hostname):
1609 """For a given hostname, return whether or not it is powered by an RPM.
1610
Simran Basi1df55112013-09-06 11:25:09 -07001611 @param hostname: hostname to check for rpm support.
1612
Simran Basid5e5e272012-09-24 15:23:59 -07001613 @return None if this host does not follows the defined naming format
1614 for RPM powered DUT's in the lab. If it does follow the format,
1615 it returns a regular expression MatchObject instead.
1616 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001617 return re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001618
1619
1620 def has_power(self):
1621 """For this host, return whether or not it is powered by an RPM.
1622
1623 @return True if this host is in the CROS lab and follows the defined
1624 naming format.
1625 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001626 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001627
1628
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001629 def _set_power(self, state, power_method):
1630 """Sets the power to the host via RPM, Servo or manual.
1631
1632 @param state Specifies which power state to set to DUT
1633 @param power_method Specifies which method of power control to
1634 use. By default "RPM" will be used. Valid values
1635 are the strings "RPM", "manual", "servoj10".
1636
1637 """
1638 ACCEPTABLE_STATES = ['ON', 'OFF']
1639
1640 if state.upper() not in ACCEPTABLE_STATES:
1641 raise error.TestError('State must be one of: %s.'
1642 % (ACCEPTABLE_STATES,))
1643
1644 if power_method == self.POWER_CONTROL_SERVO:
1645 logging.info('Setting servo port J10 to %s', state)
1646 self.servo.set('prtctl3_pwren', state.lower())
1647 time.sleep(self._USB_POWER_TIMEOUT)
1648 elif power_method == self.POWER_CONTROL_MANUAL:
1649 logging.info('You have %d seconds to set the AC power to %s.',
1650 self._POWER_CYCLE_TIMEOUT, state)
1651 time.sleep(self._POWER_CYCLE_TIMEOUT)
1652 else:
1653 if not self.has_power():
1654 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001655 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1656 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1657 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07001658 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07001659
1660
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001661 def power_off(self, power_method=POWER_CONTROL_RPM):
1662 """Turn off power to this host via RPM, Servo or manual.
1663
1664 @param power_method Specifies which method of power control to
1665 use. By default "RPM" will be used. Valid values
1666 are the strings "RPM", "manual", "servoj10".
1667
1668 """
1669 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001670
1671
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001672 def power_on(self, power_method=POWER_CONTROL_RPM):
1673 """Turn on power to this host via RPM, Servo or manual.
1674
1675 @param power_method Specifies which method of power control to
1676 use. By default "RPM" will be used. Valid values
1677 are the strings "RPM", "manual", "servoj10".
1678
1679 """
1680 self._set_power('ON', power_method)
1681
1682
1683 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1684 """Cycle power to this host by turning it OFF, then ON.
1685
1686 @param power_method Specifies which method of power control to
1687 use. By default "RPM" will be used. Valid values
1688 are the strings "RPM", "manual", "servoj10".
1689
1690 """
1691 if power_method in (self.POWER_CONTROL_SERVO,
1692 self.POWER_CONTROL_MANUAL):
1693 self.power_off(power_method=power_method)
1694 time.sleep(self._POWER_CYCLE_TIMEOUT)
1695 self.power_on(power_method=power_method)
1696 else:
1697 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001698
1699
1700 def get_platform(self):
1701 """Determine the correct platform label for this host.
1702
1703 @returns a string representing this host's platform.
1704 """
1705 crossystem = utils.Crossystem(self)
1706 crossystem.init()
1707 # Extract fwid value and use the leading part as the platform id.
1708 # fwid generally follow the format of {platform}.{firmware version}
1709 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1710 platform = crossystem.fwid().split('.')[0].lower()
1711 # Newer platforms start with 'Google_' while the older ones do not.
1712 return platform.replace('google_', '')
1713
1714
Aviv Keshet74c89a92013-02-04 15:18:30 -08001715 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001716 def get_board(self):
1717 """Determine the correct board label for this host.
1718
1719 @returns a string representing this host's board.
1720 """
1721 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1722 run_method=self.run)
1723 board = release_info['CHROMEOS_RELEASE_BOARD']
1724 # Devices in the lab generally have the correct board name but our own
1725 # development devices have {board_name}-signed-{key_type}. The board
1726 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001727 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001728 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001729 return board_format_string % board.split('-')[0]
1730 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001731
1732
Aviv Keshet74c89a92013-02-04 15:18:30 -08001733 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001734 def has_lightsensor(self):
1735 """Determine the correct board label for this host.
1736
1737 @returns the string 'lightsensor' if this host has a lightsensor or
1738 None if it does not.
1739 """
1740 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001741 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001742 try:
1743 # Run the search cmd following the symlinks. Stderr_tee is set to
1744 # None as there can be a symlink loop, but the command will still
1745 # execute correctly with a few messages printed to stderr.
1746 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1747 return 'lightsensor'
1748 except error.AutoservRunError:
1749 # egrep exited with a return code of 1 meaning none of the possible
1750 # lightsensor files existed.
1751 return None
1752
1753
Aviv Keshet74c89a92013-02-04 15:18:30 -08001754 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001755 def has_bluetooth(self):
1756 """Determine the correct board label for this host.
1757
1758 @returns the string 'bluetooth' if this host has bluetooth or
1759 None if it does not.
1760 """
1761 try:
1762 self.run('test -d /sys/class/bluetooth/hci0')
1763 # test exited with a return code of 0.
1764 return 'bluetooth'
1765 except error.AutoservRunError:
1766 # test exited with a return code 1 meaning the directory did not
1767 # exist.
1768 return None
1769
1770
Ilja Friedel0ce0b602013-08-15 18:45:27 -07001771 @label_decorator('graphics')
1772 def get_graphics(self):
1773 """
1774 Determine the correct board label for this host.
1775
1776 @returns a string representing this host's graphics. For now ARM boards
1777 return graphics:gles while all other boards return graphics:gl. This
1778 may change over time, but for robustness reasons this should avoid
1779 executing code in actual graphics libraries (which may not be ready and
1780 is tested by graphics_GLAPICheck).
1781 """
1782 uname = self.run('uname -a').stdout.lower()
1783 if 'arm' in uname:
1784 return 'graphics:gles'
1785 return 'graphics:gl'
1786
1787
Simran Basic6f1f7a2012-10-16 10:47:46 -07001788 def get_labels(self):
1789 """Return a list of labels for this given host.
1790
1791 This is the main way to retrieve all the automatic labels for a host
1792 as it will run through all the currently implemented label functions.
1793 """
1794 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001795 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001796 label = label_function(self)
1797 if label:
1798 labels.append(label)
1799 return labels