blob: 70c54126be8bdd40af15cd9a24c2c5b804adbdc2 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07006import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import logging
Dan Shi0f466e82013-02-22 15:44:58 -08008import os
Simran Basid5e5e272012-09-24 15:23:59 -07009import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080010import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070011import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070012import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070013import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070014
J. Richard Barnette45e93de2012-04-11 17:24:15 -070015from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080016from autotest_lib.client.common_lib import error
17from autotest_lib.client.common_lib import global_config
beeps687243d2013-07-18 15:29:27 -070018from autotest_lib.client.common_lib import site_utils
J. Richard Barnette45e93de2012-04-11 17:24:15 -070019from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080020from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080021from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080022from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070023from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070024from autotest_lib.server import autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070025from autotest_lib.server import utils as server_utils
Scott Zawalski89c44dd2013-02-26 09:28:02 -050026from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070027from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Fang Deng96667ca2013-08-01 17:46:18 -070028from autotest_lib.server.hosts import abstract_ssh
Fang Deng5d518f42013-08-02 14:04:32 -070029from autotest_lib.server.hosts import servo_host
beeps687243d2013-07-18 15:29:27 -070030from autotest_lib.site_utils.graphite import stats
Simran Basidcff4252012-11-20 16:13:20 -080031from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070032
33
beeps32a63082013-08-22 14:02:29 -070034try:
35 import jsonrpclib
36except ImportError:
37 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070038
Fang Dengd1c2b732013-08-20 12:59:46 -070039
beepsc87ff602013-07-31 21:53:00 -070040class FactoryImageCheckerException(error.AutoservError):
41 """Exception raised when an image is a factory image."""
42 pass
43
44
Aviv Keshet74c89a92013-02-04 15:18:30 -080045def add_label_detector(label_function_list, label_list=None, label=None):
46 """Decorator used to group functions together into the provided list.
47 @param label_function_list: List of label detecting functions to add
48 decorated function to.
49 @param label_list: List of detectable labels to add detectable labels to.
50 (Default: None)
51 @param label: Label string that is detectable by this detection function
52 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080053 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070054 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080055 """
56 @param func: The function to be added as a detector.
57 """
58 label_function_list.append(func)
59 if label and label_list is not None:
60 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070061 return func
62 return add_func
63
64
Fang Deng0ca40e22013-08-27 17:47:44 -070065class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070066 """Chromium OS specific subclass of Host."""
67
68 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050069 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070070
Richard Barnette0c73ffc2012-11-19 15:21:18 -080071 # Time to wait for new kernel to be marked successful after
72 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -070073 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -070074
Richard Barnette03a0c132012-11-05 12:40:35 -080075 # Timeout values (in seconds) associated with various Chrome OS
76 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070077 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080078 # In general, a good rule of thumb is that the timeout can be up
79 # to twice the typical measured value on the slowest platform.
80 # The times here have not necessarily been empirically tested to
81 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070082 #
83 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080084 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
85 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070086 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080087 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080088 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070089 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080090 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080091 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080092 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
Chris Sosab76e0ee2013-05-22 16:55:41 -070093 # REBOOT_TIMEOUT: How long to wait for a reboot.
beepsf079cfb2013-09-18 17:49:51 -070094 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070095
96 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -080097 RESUME_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -070098 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070099 USB_BOOT_TIMEOUT = 150
Chris Sosab76e0ee2013-05-22 16:55:41 -0700100
101 # We have a long timeout to ensure we don't flakily fail due to other
102 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
103 REBOOT_TIMEOUT = 300
104
beepsf079cfb2013-09-18 17:49:51 -0700105 INSTALL_TIMEOUT = 240
Richard Barnette03a0c132012-11-05 12:40:35 -0800106
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800107 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
108 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
109 _USB_POWER_TIMEOUT = 5
110 _POWER_CYCLE_TIMEOUT = 10
111
beeps32a63082013-08-22 14:02:29 -0700112 _RPC_PROXY_URL = 'http://localhost:%d'
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800113
Richard Barnette82c35912012-11-20 10:09:10 -0800114 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
115 'rpm_recovery_boards', type=str).split(',')
116
117 _MAX_POWER_CYCLE_ATTEMPTS = 6
118 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
119 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
120 'host[0-9]+')
121 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
122 'in_illuminance0_raw',
123 'illuminance0_input']
124 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
125 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800126 _DETECTABLE_LABELS = []
127 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
128 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700129
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800130 # Constants used in ping_wait_up() and ping_wait_down().
131 #
132 # _PING_WAIT_COUNT is the approximate number of polling
133 # cycles to use when waiting for a host state change.
134 #
135 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
136 # for arguments to the internal _ping_wait_for_status()
137 # method.
138 _PING_WAIT_COUNT = 40
139 _PING_STATUS_DOWN = False
140 _PING_STATUS_UP = True
141
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800142 # Allowed values for the power_method argument.
143
144 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
145 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
146 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
147 POWER_CONTROL_RPM = 'RPM'
148 POWER_CONTROL_SERVO = 'servoj10'
149 POWER_CONTROL_MANUAL = 'manual'
150
151 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
152 POWER_CONTROL_SERVO,
153 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800154
Simran Basi5e6339a2013-03-21 11:34:32 -0700155 _RPM_OUTLET_CHANGED = 'outlet_changed'
156
beeps687243d2013-07-18 15:29:27 -0700157
J. Richard Barnette964fba02012-10-24 17:34:29 -0700158 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800159 def get_servo_arguments(args_dict):
160 """Extract servo options from `args_dict` and return the result.
161
162 Take the provided dictionary of argument options and return
163 a subset that represent standard arguments needed to
164 construct a servo object for a host. The intent is to
165 provide standard argument processing from run_remote_tests
166 for tests that require a servo to operate.
167
168 Recommended usage:
169 ~~~~~~~~
170 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700171 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800172 host = hosts.create_host(machine, servo_args=servo_args)
173 ~~~~~~~~
174
175 @param args_dict Dictionary from which to extract the servo
176 arguments.
177 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700178 servo_args = {}
179 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800180 if arg in args_dict:
181 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700182 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700183
J. Richard Barnette964fba02012-10-24 17:34:29 -0700184
Fang Dengd1c2b732013-08-20 12:59:46 -0700185 def _initialize(self, hostname, servo_args=None, ssh_verbosity_flag='',
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700186 ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700187 *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700188 """Initialize superclasses, and |self.servo|.
189
Fang Deng5d518f42013-08-02 14:04:32 -0700190 This method checks whether a servo is required by checking whether
191 servo_args is None. This method will only attempt to create a servo
192 object when servo is required by the test.
193
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700194 For creating the host servo object, there are three
195 possibilities: First, if the host is a lab system known to
196 have a servo board, we connect to that servo unconditionally.
197 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700198 servo features for testing, it will pass settings for
199 `servo_host`, `servo_port`, or both. If neither of these
200 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700201
202 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700203 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700204 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700205 # self.env is a dictionary of environment variable settings
206 # to be exported for commands run on the host.
207 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
208 # errors that might happen.
209 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700210 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700211 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700212 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700213 self.servo = None
214 # TODO(fdeng): We need to simplify the
215 # process of servo and servo_host initialization.
216 # crbug.com/298432
217 self._servo_host = self._create_servo_host(servo_args)
218 # TODO(fdeng): 'servo_args is not None' is used to determine whether
219 # a test needs a servo. Better solution is needed.
220 # There are three possible cases here:
221 # 1. servo_arg is None
222 # 2. servo arg is an empty dictionary
223 # 3. servo_arg is a dictionary that has entries of 'servo_host',
224 # 'servo_port'(optional).
225 # We assume that:
226 # a. A test that requires a servo always calls get_servo_arguments
227 # and passes in its return value as |servo_args|.
228 # b. get_servo_arguments never returns None.
229 # Based on the assumptions, we reason that only in case 2 and 3
230 # a servo is required, i.e. when the servo_args is not None.
231 if servo_args is not None:
232 self.servo = self._servo_host.create_healthy_servo_object()
233
234
235 def _create_servo_host(self, servo_args):
236 """Create a ServoHost object.
237
238 There three possible cases:
239 1) If the DUT is in Cros Lab and has a beaglebone and a servo, then
240 create a ServoHost object pointing to the beaglebone. servo_args
241 is ignored.
242 2) If not case 1) and servo_args is neither None nor empty, then
243 create a ServoHost object using servo_args.
244 3) If neither case 1) or 2) applies, return None.
245
246 @param servo_args: A dictionary that contains args for creating
247 a ServoHost object,
248 e.g. {'servo_host': '172.11.11.111',
249 'servo_port': 9999}.
250 See comments above.
251
252 @returns: A ServoHost object or None. See comments above.
253
254 """
255 servo_host_name = servo_host.make_servo_hostname(self.hostname)
256 if utils.host_is_in_lab_zone(servo_host_name):
257 return servo_host.ServoHost(servo_host=servo_host_name)
258 elif servo_args is not None:
259 return servo_host.ServoHost(**servo_args)
260 else:
261 return None
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700262
263
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500264 def get_repair_image_name(self):
265 """Generate a image_name from variables in the global config.
266
267 @returns a str of $board-version/$BUILD.
268
269 """
270 stable_version = global_config.global_config.get_config_value(
271 'CROS', 'stable_cros_version')
272 build_pattern = global_config.global_config.get_config_value(
273 'CROS', 'stable_build_pattern')
274 board = self._get_board_from_afe()
275 if board is None:
276 raise error.AutoservError('DUT has no board attribute, '
277 'cannot be repaired.')
278 return build_pattern % (board, stable_version)
279
280
Scott Zawalski62bacae2013-03-05 10:40:32 -0500281 def _host_in_AFE(self):
282 """Check if the host is an object the AFE knows.
283
284 @returns the host object.
285 """
286 return self._AFE.get_hosts(hostname=self.hostname)
287
288
Chris Sosab76e0ee2013-05-22 16:55:41 -0700289 def lookup_job_repo_url(self):
290 """Looks up the job_repo_url for the host.
291
292 @returns job_repo_url from AFE or None if not found.
293
294 @raises KeyError if the host does not have a job_repo_url
295 """
296 if not self._host_in_AFE():
297 return None
298
299 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700300 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
301 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700302
303
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500304 def clear_cros_version_labels_and_job_repo_url(self):
305 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500306 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400307 return
308
Scott Zawalski62bacae2013-03-05 10:40:32 -0500309 host_list = [self.hostname]
310 labels = self._AFE.get_labels(
311 name__startswith=ds_constants.VERSION_PREFIX,
312 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800313
Scott Zawalski62bacae2013-03-05 10:40:32 -0500314 for label in labels:
315 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500316
beepscb6f1e22013-06-28 19:14:10 -0700317 self.update_job_repo_url(None, None)
318
319
320 def update_job_repo_url(self, devserver_url, image_name):
321 """
322 Updates the job_repo_url host attribute and asserts it's value.
323
324 @param devserver_url: The devserver to use in the job_repo_url.
325 @param image_name: The name of the image to use in the job_repo_url.
326
327 @raises AutoservError: If we failed to update the job_repo_url.
328 """
329 repo_url = None
330 if devserver_url and image_name:
331 repo_url = tools.get_package_url(devserver_url, image_name)
332 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500333 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700334 if self.lookup_job_repo_url() != repo_url:
335 raise error.AutoservError('Failed to update job_repo_url with %s, '
336 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500337
338
Dan Shie9309262013-06-19 22:50:21 -0700339 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400340 """Add cros_version labels and host attribute job_repo_url.
341
342 @param image_name: The name of the image e.g.
343 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700344
Scott Zawalskieadbf702013-03-14 09:23:06 -0400345 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500346 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400347 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500348
Scott Zawalskieadbf702013-03-14 09:23:06 -0400349 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700350 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500351
352 labels = self._AFE.get_labels(name=cros_label)
353 if labels:
354 label = labels[0]
355 else:
356 label = self._AFE.create_label(name=cros_label)
357
358 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700359 self.update_job_repo_url(devserver_url, image_name)
360
361
beepsdae65fd2013-07-26 16:24:41 -0700362 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700363 """
364 Make sure job_repo_url of this host is valid.
365
joychen03eaad92013-06-26 09:55:21 -0700366 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700367 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
368 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
369 download and extract it. If the devserver embedded in the url is
370 unresponsive, update the job_repo_url of the host after staging it on
371 another devserver.
372
373 @param job_repo_url: A url pointing to the devserver where the autotest
374 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700375 @param tag: The tag from the server job, in the format
376 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700377
378 @raises DevServerException: If we could not resolve a devserver.
379 @raises AutoservError: If we're unable to save the new job_repo_url as
380 a result of choosing a new devserver because the old one failed to
381 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700382 @raises urllib2.URLError: If the devserver embedded in job_repo_url
383 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700384 """
385 job_repo_url = self.lookup_job_repo_url()
386 if not job_repo_url:
387 logging.warning('No job repo url set on host %s', self.hostname)
388 return
389
390 logging.info('Verifying job repo url %s', job_repo_url)
391 devserver_url, image_name = tools.get_devserver_build_from_package_url(
392 job_repo_url)
393
beeps0c865032013-07-30 11:37:06 -0700394 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700395
396 logging.info('Staging autotest artifacts for %s on devserver %s',
397 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700398
399 start_time = time.time()
beepscb6f1e22013-06-28 19:14:10 -0700400 ds.stage_artifacts(image_name, ['autotest'])
beeps687243d2013-07-18 15:29:27 -0700401 stage_time = time.time() - start_time
402
403 # Record how much of the verification time comes from a devserver
404 # restage. If we're doing things right we should not see multiple
405 # devservers for a given board/build/branch path.
406 try:
407 board, build_type, branch = site_utils.ParseBuildName(
408 image_name)[:3]
409 except site_utils.ParseBuildNameException as e:
410 pass
411 else:
beeps0c865032013-07-30 11:37:06 -0700412 devserver = devserver_url[
413 devserver_url.find('/')+2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700414 stats_key = {
415 'board': board,
416 'build_type': build_type,
417 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700418 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700419 }
420 stats.Gauge('verify_job_repo_url').send(
421 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
422 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700423
Scott Zawalskieadbf702013-03-14 09:23:06 -0400424
Dan Shi0f466e82013-02-22 15:44:58 -0800425 def _try_stateful_update(self, update_url, force_update, updater):
426 """Try to use stateful update to initialize DUT.
427
428 When DUT is already running the same version that machine_install
429 tries to install, stateful update is a much faster way to clean up
430 the DUT for testing, compared to a full reimage. It is implemeted
431 by calling autoupdater.run_update, but skipping updating root, as
432 updating the kernel is time consuming and not necessary.
433
434 @param update_url: url of the image.
435 @param force_update: Set to True to update the image even if the DUT
436 is running the same version.
437 @param updater: ChromiumOSUpdater instance used to update the DUT.
438 @returns: True if the DUT was updated with stateful update.
439
440 """
441 if not updater.check_version():
442 return False
443 if not force_update:
444 logging.info('Canceling stateful update because the new and '
445 'old versions are the same.')
446 return False
447 # Following folders should be rebuilt after stateful update.
448 # A test file is used to confirm each folder gets rebuilt after
449 # the stateful update.
450 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
451 test_file = '.test_file_to_be_deleted'
452 for folder in folders_to_check:
453 touch_path = os.path.join(folder, test_file)
454 self.run('touch %s' % touch_path)
455
456 if not updater.run_update(force_update=True, update_root=False):
457 return False
458
459 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700460 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800461 check_file_cmd = 'test -f %s; echo $?'
462 for folder in folders_to_check:
463 test_file_path = os.path.join(folder, test_file)
464 result = self.run(check_file_cmd % test_file_path,
465 ignore_status=True)
466 if result.exit_status == 1:
467 return False
468 return True
469
470
J. Richard Barnette7275b612013-06-04 18:13:11 -0700471 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800472 """After the DUT is updated, confirm machine_install succeeded.
473
474 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700475 @param expected_kernel: kernel expected to be active after reboot,
476 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800477
478 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700479 # Touch the lab machine file to leave a marker that
480 # distinguishes this image from other test images.
481 # Afterwards, we must re-run the autoreboot script because
482 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800483 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800484 self.run('start autoreboot')
485
J. Richard Barnette7275b612013-06-04 18:13:11 -0700486 # Figure out the newly active kernel.
487 active_kernel, _ = updater.get_kernel_state()
488
489 # Check for rollback due to a bad build.
490 if expected_kernel and active_kernel != expected_kernel:
491 # Print out some information to make it easier to debug
492 # the rollback.
Dan Shi0f466e82013-02-22 15:44:58 -0800493 logging.debug('Dumping partition table.')
Dan Shi346725f2013-03-20 15:22:38 -0700494 self.run('cgpt show $(rootdev -s -d)')
Dan Shi0f466e82013-02-22 15:44:58 -0800495 logging.debug('Dumping crossystem for firmware debugging.')
Dan Shi346725f2013-03-20 15:22:38 -0700496 self.run('crossystem --all')
Dan Shi0f466e82013-02-22 15:44:58 -0800497 raise autoupdater.ChromiumOSError(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700498 'Build %s failed to boot on %s; system rolled back '
499 'to previous build' % (updater.update_version,
500 self.hostname))
Dan Shi0f466e82013-02-22 15:44:58 -0800501
J. Richard Barnette7275b612013-06-04 18:13:11 -0700502 # Check that we've got the build we meant to install.
503 if not updater.check_version_to_confirm_install():
504 raise autoupdater.ChromiumOSError(
505 'Failed to update %s to build %s; found build '
506 '%s instead' % (self.hostname,
507 updater.update_version,
508 updater.get_build_id()))
Scott Zawalski62bacae2013-03-05 10:40:32 -0500509
J. Richard Barnette7275b612013-06-04 18:13:11 -0700510 # Make sure chromeos-setgoodkernel runs.
511 try:
Dan Shi0f466e82013-02-22 15:44:58 -0800512 utils.poll_for_condition(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700513 lambda: (updater.get_kernel_tries(active_kernel) == 0
514 and updater.get_kernel_success(active_kernel)),
515 exception=autoupdater.ChromiumOSError(),
Dan Shi0f466e82013-02-22 15:44:58 -0800516 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
J. Richard Barnette7275b612013-06-04 18:13:11 -0700517 except autoupdater.ChromiumOSError as e:
518 services_status = self.run('status system-services').stdout
519 if services_status != 'system-services start/running\n':
520 event = ('Chrome failed to reach login screen')
521 else:
522 event = ('update-engine failed to call '
523 'chromeos-setgoodkernel')
524 raise autoupdater.ChromiumOSError(
525 'After update and reboot, %s '
526 'within %d seconds' % (event,
527 self._KERNEL_UPDATE_TIMEOUT))
Dan Shi0f466e82013-02-22 15:44:58 -0800528
529
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700530 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400531 """Stage a build on a devserver and return the update_url.
532
533 @param image_name: a name like lumpy-release/R27-3837.0.0
534 @returns an update URL like:
535 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
536 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700537 if not image_name:
538 image_name = self.get_repair_image_name()
539 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400540 devserver = dev_server.ImageServer.resolve(image_name)
541 devserver.trigger_download(image_name, synchronous=False)
542 return tools.image_url_pattern() % (devserver.url(), image_name)
543
544
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700545 def stage_image_for_servo(self, image_name=None):
546 """Stage a build on a devserver and return the update_url.
547
548 @param image_name: a name like lumpy-release/R27-3837.0.0
549 @returns an update URL like:
550 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
551 """
552 if not image_name:
553 image_name = self.get_repair_image_name()
554 logging.info('Staging build for servo install: %s', image_name)
555 devserver = dev_server.ImageServer.resolve(image_name)
556 devserver.stage_artifacts(image_name, ['test_image'])
557 return devserver.get_test_image_url(image_name)
558
559
beepse539be02013-07-31 21:57:39 -0700560 def stage_factory_image_for_servo(self, image_name):
561 """Stage a build on a devserver and return the update_url.
562
563 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700564
beepse539be02013-07-31 21:57:39 -0700565 @return: An update URL, eg:
566 http://<devserver>/static/canary-channel/\
567 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700568
569 @raises: ValueError if the factory artifact name is missing from
570 the config.
571
beepse539be02013-07-31 21:57:39 -0700572 """
573 if not image_name:
574 logging.error('Need an image_name to stage a factory image.')
575 return
576
beeps12c0a3c2013-09-03 11:58:27 -0700577 factory_artifact = global_config.global_config.get_config_value(
578 'CROS', 'factory_artifact', type=str, default='')
579 if not factory_artifact:
580 raise ValueError('Cannot retrieve the factory artifact name from '
581 'autotest config, and hence cannot stage factory '
582 'artifacts.')
583
beepse539be02013-07-31 21:57:39 -0700584 logging.info('Staging build for servo install: %s', image_name)
585 devserver = dev_server.ImageServer.resolve(image_name)
586 devserver.stage_artifacts(
587 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700588 [factory_artifact],
589 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700590
591 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
592
593
Chris Sosaa3ac2152012-05-23 22:23:13 -0700594 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500595 local_devserver=False, repair=False):
596 """Install the DUT.
597
Dan Shi0f466e82013-02-22 15:44:58 -0800598 Use stateful update if the DUT is already running the same build.
599 Stateful update does not update kernel and tends to run much faster
600 than a full reimage. If the DUT is running a different build, or it
601 failed to do a stateful update, full update, including kernel update,
602 will be applied to the DUT.
603
Scott Zawalskieadbf702013-03-14 09:23:06 -0400604 Once a host enters machine_install its cros_version label will be
605 removed as well as its host attribute job_repo_url (used for
606 package install).
607
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500608 @param update_url: The url to use for the update
609 pattern: http://$devserver:###/update/$build
610 If update_url is None and repair is True we will install the
611 stable image listed in global_config under
612 CROS.stable_cros_version.
613 @param force_update: Force an update even if the version installed
614 is the same. Default:False
615 @param local_devserver: Used by run_remote_test to allow people to
616 use their local devserver. Default: False
617 @param repair: Whether or not we are in repair mode. This adds special
618 cases for repairing a machine like starting update_engine.
619 Setting repair to True sets force_update to True as well.
620 default: False
621 @raises autoupdater.ChromiumOSError
622
623 """
Dan Shi7458bf62013-06-10 12:50:16 -0700624 if update_url:
625 logging.debug('update url is set to %s', update_url)
626 else:
627 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700628 if self._parser.options.image:
629 requested_build = self._parser.options.image
630 if requested_build.startswith('http://'):
631 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700632 logging.debug('update url is retrieved from requested_build'
633 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700634 else:
635 # Try to stage any build that does not start with
636 # http:// on the devservers defined in
637 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700638 update_url = self._stage_image_for_update(requested_build)
639 logging.debug('Build staged, and update_url is set to: %s',
640 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700641 elif repair:
642 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700643 logging.debug('Build staged, and update_url is set to: %s',
644 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400645 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700646 raise autoupdater.ChromiumOSError(
647 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500648
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500649 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800650 # In case the system is in a bad state, we always reboot the machine
651 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700652 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500653 self.run('stop update-engine; start update-engine')
654 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800655
Chris Sosaa3ac2152012-05-23 22:23:13 -0700656 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700657 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800658 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400659 # Remove cros-version and job_repo_url host attribute from host.
660 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800661 # If the DUT is already running the same build, try stateful update
662 # first. Stateful update does not update kernel and tends to run much
663 # faster than a full reimage.
664 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700665 updated = self._try_stateful_update(
666 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800667 if updated:
668 logging.info('DUT is updated with stateful update.')
669 except Exception as e:
670 logging.exception(e)
671 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700672
Dan Shi0f466e82013-02-22 15:44:58 -0800673 inactive_kernel = None
674 # Do a full update if stateful update is not applicable or failed.
675 if not updated:
676 # In case the system is in a bad state, we always reboot the
677 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700678 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700679
680 # TODO(sosa): Remove temporary hack to get rid of bricked machines
681 # that can't update due to a corrupted policy.
682 self.run('rm -rf /var/lib/whitelist')
683 self.run('touch /var/lib/whitelist')
684 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400685 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700686
Dan Shi0f466e82013-02-22 15:44:58 -0800687 if updater.run_update(force_update):
688 updated = True
689 # Figure out active and inactive kernel.
690 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700691
Dan Shi0f466e82013-02-22 15:44:58 -0800692 # Ensure inactive kernel has higher priority than active.
693 if (updater.get_kernel_priority(inactive_kernel)
694 < updater.get_kernel_priority(active_kernel)):
695 raise autoupdater.ChromiumOSError(
696 'Update failed. The priority of the inactive kernel'
697 ' partition is less than that of the active kernel'
698 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700699
Dan Shi0f466e82013-02-22 15:44:58 -0800700 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700701 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700702
Dan Shi0f466e82013-02-22 15:44:58 -0800703 if updated:
704 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400705 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700706 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800707
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700708 # Clean up any old autotest directories which may be lying around.
709 for path in global_config.global_config.get_config_value(
710 'AUTOSERV', 'client_autodir_paths', type=list):
711 self.run('rm -rf ' + path)
712
713
Dan Shi10e992b2013-08-30 11:02:59 -0700714 def show_update_engine_log(self):
715 """Output update engine log."""
716 logging.debug('Dumping %s', constants.UPDATE_ENGINE_LOG)
717 self.run('cat %s' % constants.UPDATE_ENGINE_LOG)
718
719
Richard Barnette82c35912012-11-20 10:09:10 -0800720 def _get_board_from_afe(self):
721 """Retrieve this host's board from its labels in the AFE.
722
723 Looks for a host label of the form "board:<board>", and
724 returns the "<board>" part of the label. `None` is returned
725 if there is not a single, unique label matching the pattern.
726
727 @returns board from label, or `None`.
728 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700729 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800730
731
732 def get_build(self):
733 """Retrieve the current build for this Host from the AFE.
734
735 Looks through this host's labels in the AFE to determine its build.
736
737 @returns The current build or None if it could not find it or if there
738 were multiple build labels assigned to this host.
739 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700740 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800741
742
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500743 def _install_repair(self):
744 """Attempt to repair this host using upate-engine.
745
746 If the host is up, try installing the DUT with a stable
747 "repair" version of Chrome OS as defined in the global_config
748 under CROS.stable_cros_version.
749
Scott Zawalski62bacae2013-03-05 10:40:32 -0500750 @raises AutoservRepairMethodNA if the DUT is not reachable.
751 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500752
753 """
754 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500755 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500756
757 logging.info('Attempting to reimage machine to repair image.')
758 try:
759 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700760 except autoupdater.ChromiumOSError as e:
761 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500762 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500763 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500764
765
beepsf079cfb2013-09-18 17:49:51 -0700766 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
767 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500768 """
769 Re-install the OS on the DUT by:
770 1) installing a test image on a USB storage device attached to the Servo
771 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800772 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700773 3) installing the image with chromeos-install.
774
Scott Zawalski62bacae2013-03-05 10:40:32 -0500775 @param image_url: If specified use as the url to install on the DUT.
776 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700777 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
778 Factory images need a longer usb_boot_timeout than regular
779 cros images.
780 @param install_timeout: The timeout to use when installing the chromeos
781 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800782
Scott Zawalski62bacae2013-03-05 10:40:32 -0500783 @raises AutoservError if the image fails to boot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800784 """
beepsf079cfb2013-09-18 17:49:51 -0700785
786 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
787 % usb_boot_timeout)
788 logging.info('Downloading image to USB, then booting from it. Usb boot '
789 'timeout = %s', usb_boot_timeout)
790 timer = stats.Timer(usb_boot_timer_key)
791 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700792 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700793 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500794 raise error.AutoservRepairFailure(
795 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700796 usb_boot_timeout)
797 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500798
beepsf079cfb2013-09-18 17:49:51 -0700799 install_timer_key = ('servo_install.install_timeout_%s'
800 % install_timeout)
801 timer = stats.Timer(install_timer_key)
802 timer.start()
803 logging.info('Installing image through chromeos-install.')
804 self.run('chromeos-install --yes', timeout=install_timeout)
805 timer.stop()
806
807 logging.info('Power cycling DUT through servo.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800808 self.servo.power_long_press()
Fang Dengafb88142013-05-30 17:44:31 -0700809 self.servo.switch_usbkey('off')
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700810 # We *must* use power_on() here; on Parrot it's how we get
811 # out of recovery mode.
812 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -0700813
814 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800815 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
816 raise error.AutoservError('DUT failed to reboot installed '
817 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500818 self.BOOT_TIMEOUT)
819
820
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700821 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500822 """Reinstall the DUT utilizing servo and a test image.
823
824 Re-install the OS on the DUT by:
825 1) installing a test image on a USB storage device attached to the Servo
826 board,
827 2) booting that image in recovery mode, and then
828 3) installing the image with chromeos-install.
829
Scott Zawalski62bacae2013-03-05 10:40:32 -0500830 @raises AutoservRepairMethodNA if the device does not have servo
831 support.
832
833 """
834 if not self.servo:
835 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
836 'DUT has no servo support.')
837
838 logging.info('Attempting to recovery servo enabled device with '
839 'servo_repair_reinstall')
840
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700841 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500842 self.servo_install(image_url)
843
844
845 def _servo_repair_power(self):
846 """Attempt to repair DUT using an attached Servo.
847
848 Attempt to power on the DUT via power_long_press.
849
850 @raises AutoservRepairMethodNA if the device does not have servo
851 support.
852 @raises AutoservRepairFailure if the repair fails for any reason.
853 """
854 if not self.servo:
855 raise error.AutoservRepairMethodNA('Repair Power NA: '
856 'DUT has no servo support.')
857
858 logging.info('Attempting to recover servo enabled device by '
859 'powering it off and on.')
860 self.servo.get_power_state_controller().power_off()
861 self.servo.get_power_state_controller().power_on()
862 if self.wait_up(self.BOOT_TIMEOUT):
863 return
864
865 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800866
867
Richard Barnette82c35912012-11-20 10:09:10 -0800868 def _powercycle_to_repair(self):
869 """Utilize the RPM Infrastructure to bring the host back up.
870
871 If the host is not up/repaired after the first powercycle we utilize
872 auto fallback to the last good install by powercycling and rebooting the
873 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500874
875 @raises AutoservRepairMethodNA if the device does not support remote
876 power.
877 @raises AutoservRepairFailure if the repair fails for any reason.
878
Richard Barnette82c35912012-11-20 10:09:10 -0800879 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500880 if not self.has_power():
881 raise error.AutoservRepairMethodNA('Device does not support power.')
882
Richard Barnette82c35912012-11-20 10:09:10 -0800883 logging.info('Attempting repair via RPM powercycle.')
884 failed_cycles = 0
885 self.power_cycle()
886 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
887 failed_cycles += 1
888 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -0500889 raise error.AutoservRepairFailure(
890 'Powercycled host %s %d times; device did not come back'
891 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -0800892 self.power_cycle()
893 if failed_cycles == 0:
894 logging.info('Powercycling was successful first time.')
895 else:
896 logging.info('Powercycling was successful after %d failures.',
897 failed_cycles)
898
899
900 def repair_full(self):
901 """Repair a host for repair level NO_PROTECTION.
902
903 This overrides the base class function for repair; it does
904 not call back to the parent class, but instead offers a
905 simplified implementation based on the capabilities in the
906 Chrome OS test lab.
907
Fang Deng5d518f42013-08-02 14:04:32 -0700908 It first verifies and repairs servo if it is a DUT in CrOS
909 lab and a servo is attached. On success, it proceeds to
910 the following steps to repair the DUT.
911
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700912 If `self.verify()` fails, the following procedures are
913 attempted:
914 1. Try to re-install to a known stable image using
915 auto-update.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500916 2. If there's a servo for the DUT, try to power the DUT off and
917 on.
918 3. If there's a servo for the DUT, try to re-install via
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700919 the servo.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500920 4. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800921 by power-cycling.
922
923 As with the parent method, the last operation performed on
924 the DUT must be to call `self.verify()`; if that call fails,
925 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700926
Scott Zawalski62bacae2013-03-05 10:40:32 -0500927 @raises AutoservRepairTotalFailure if the repair process fails to
928 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -0700929 @raises ServoHostRepairTotalFailure if the repair process fails to
930 fix the servo host if one is attached to the DUT.
931 @raises AutoservSshPermissionDeniedError if it is unable
932 to ssh to the servo host due to permission error.
933
Richard Barnette82c35912012-11-20 10:09:10 -0800934 """
Fang Deng5d518f42013-08-02 14:04:32 -0700935 if self._servo_host:
936 self.servo = self._servo_host.create_healthy_servo_object()
937
Scott Zawalski62bacae2013-03-05 10:40:32 -0500938 # TODO(scottz): This should use something similar to label_decorator,
939 # but needs to be populated in order so DUTs are repaired with the
940 # least amount of effort.
941 repair_funcs = [self._install_repair, self._servo_repair_power,
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700942 self._servo_repair_reinstall,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500943 self._powercycle_to_repair]
944 errors = []
Simran Basie6130932013-10-01 14:07:52 -0700945 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500946 for repair_func in repair_funcs:
947 try:
948 repair_func()
949 self.verify()
Simran Basie6130932013-10-01 14:07:52 -0700950 stats.Counter(
951 '%s.SUCCEEDED' % repair_func.__name__).increment()
952 if board:
953 stats.Counter(
954 '%s.SUCCEEDED.%s' % (repair_func.__name__,
955 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500956 return
Simran Basie6130932013-10-01 14:07:52 -0700957 except error.AutoservRepairMethodNA as e:
958 stats.Counter(
959 '%s.RepairNA' % repair_func.__name__).increment()
960 if board:
961 stats.Counter(
962 '%s.RepairNA.%s' % (repair_func.__name__,
963 board)).increment()
964 logging.warn('Repair function NA: %s', e)
965 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -0500966 except Exception as e:
Simran Basie6130932013-10-01 14:07:52 -0700967 stats.Counter(
968 '%s.FAILED' % repair_func.__name__).increment()
969 if board:
970 stats.Counter(
971 '%s.FAILED.%s' % (repair_func.__name__,
972 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500973 logging.warn('Failed to repair device: %s', e)
974 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500975
Simran Basie6130932013-10-01 14:07:52 -0700976 stats.Counter('Full_Repair_Failed').increment()
977 if board:
978 stats.Counter(
979 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500980 raise error.AutoservRepairTotalFailure(
981 'All attempts at repairing the device failed:\n%s' %
982 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -0800983
984
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700985 def close(self):
beeps32a63082013-08-22 14:02:29 -0700986 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -0700987 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700988
989
Simran Basi5e6339a2013-03-21 11:34:32 -0700990 def _cleanup_poweron(self):
991 """Special cleanup method to make sure hosts always get power back."""
992 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
993 hosts = afe.get_hosts(hostname=self.hostname)
994 if not hosts or not (self._RPM_OUTLET_CHANGED in
995 hosts[0].attributes):
996 return
997 logging.debug('This host has recently interacted with the RPM'
998 ' Infrastructure. Ensuring power is on.')
999 try:
1000 self.power_on()
1001 except rpm_client.RemotePowerException:
1002 # If cleanup has completed but there was an issue with the RPM
1003 # Infrastructure, log an error message rather than fail cleanup
1004 logging.error('Failed to turn Power On for this host after '
1005 'cleanup through the RPM Infrastructure.')
1006 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1007 hostname=self.hostname)
1008
1009
beepsc87ff602013-07-31 21:53:00 -07001010 def _is_factory_image(self):
1011 """Checks if the image on the DUT is a factory image.
1012
1013 @return: True if the image on the DUT is a factory image.
1014 False otherwise.
1015 """
1016 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1017 return result.exit_status == 0
1018
1019
1020 def _restart_ui(self):
1021 """Restarts ui.
1022
1023 @raises: FactoryImageCheckerException for factory images, since
1024 we cannot attempt to restart ui on them.
1025 error.AutoservRunError for any other type of error that
1026 occurs while restarting ui.
1027 """
1028 if self._is_factory_image():
1029 raise FactoryImageCheckerException('Cannot restart ui on factory '
1030 'images')
1031
Chris Sosaf4d43ff2012-10-30 11:21:05 -07001032 client_at = autotest.Autotest(self)
beepsc87ff602013-07-31 21:53:00 -07001033 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
1034 '_clear_login_prompt_state')
1035 self.run('restart ui')
1036 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
1037 '_wait_for_login_prompt')
1038
1039
1040 def cleanup(self):
Richard Barnette82c35912012-11-20 10:09:10 -08001041 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001042 try:
beepsc87ff602013-07-31 21:53:00 -07001043 self._restart_ui()
1044 except (error.AutotestRunError, error.AutoservRunError,
1045 FactoryImageCheckerException):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001046 logging.warn('Unable to restart ui, rebooting device.')
1047 # Since restarting the UI fails fall back to normal Autotest
1048 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001049 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001050 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001051 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001052 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001053
1054
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001055 def reboot(self, **dargs):
1056 """
1057 This function reboots the site host. The more generic
1058 RemoteHost.reboot() performs sync and sleeps for 5
1059 seconds. This is not necessary for Chrome OS devices as the
1060 sync should be finished in a short time during the reboot
1061 command.
1062 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001063 if 'reboot_cmd' not in dargs:
1064 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
1065 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001066 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001067 if 'fastsync' not in dargs:
1068 dargs['fastsync'] = True
Fang Deng0ca40e22013-08-27 17:47:44 -07001069 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001070
1071
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001072 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001073 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001074
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001075 Tests for the following conditions:
1076 1. All conditions tested by the parent version of this
1077 function.
1078 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001079 3. Sufficient space in /mnt/stateful_partition/encrypted.
1080 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001081
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001082 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001083 super(CrosHost, self).verify_software()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001084 self.check_diskspace(
1085 '/mnt/stateful_partition',
1086 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001087 'SERVER', 'gb_diskspace_required', type=float,
1088 default=20.0))
1089 self.check_diskspace(
1090 '/mnt/stateful_partition/encrypted',
1091 global_config.global_config.get_config_value(
1092 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1093 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001094
1095 # Factory images don't run update engine,
1096 # goofy controls dbus on these DUTs.
1097 if not self._is_factory_image():
1098 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001099 # Makes sure python is present, loads and can use built in functions.
1100 # We have seen cases where importing cPickle fails with undefined
1101 # symbols in cPickle.so.
1102 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001103
1104
Fang Deng96667ca2013-08-01 17:46:18 -07001105 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1106 connect_timeout=None, alive_interval=None):
1107 """Override default make_ssh_command to use options tuned for Chrome OS.
1108
1109 Tuning changes:
1110 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1111 connection failure. Consistency with remote_access.sh.
1112
1113 - ServerAliveInterval=180; which causes SSH to ping connection every
1114 180 seconds. In conjunction with ServerAliveCountMax ensures
1115 that if the connection dies, Autotest will bail out quickly.
1116 Originally tried 60 secs, but saw frequent job ABORTS where
1117 the test completed successfully.
1118
1119 - ServerAliveCountMax=3; consistency with remote_access.sh.
1120
1121 - ConnectAttempts=4; reduce flakiness in connection errors;
1122 consistency with remote_access.sh.
1123
1124 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1125 Host keys change with every new installation, don't waste
1126 memory/space saving them.
1127
1128 - SSH protocol forced to 2; needed for ServerAliveInterval.
1129
1130 @param user User name to use for the ssh connection.
1131 @param port Port on the target host to use for ssh connection.
1132 @param opts Additional options to the ssh command.
1133 @param hosts_file Ignored.
1134 @param connect_timeout Ignored.
1135 @param alive_interval Ignored.
1136 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001137 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1138 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001139 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1140 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1141 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1142 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001143 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1144 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001145
1146
beeps32a63082013-08-22 14:02:29 -07001147 def _create_ssh_tunnel(self, port, local_port):
1148 """Create an ssh tunnel from local_port to port.
1149
1150 @param port: remote port on the host.
1151 @param local_port: local forwarding port.
1152
1153 @return: the tunnel process.
1154 """
1155 # Chrome OS on the target closes down most external ports
1156 # for security. We could open the port, but doing that
1157 # would conflict with security tests that check that only
1158 # expected ports are open. So, to get to the port on the
1159 # target we use an ssh tunnel.
1160 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1161 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1162 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1163 logging.debug('Full tunnel command: %s', tunnel_cmd)
1164 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1165 logging.debug('Started ssh tunnel, local = %d'
1166 ' remote = %d, pid = %d',
1167 local_port, port, tunnel_proc.pid)
1168 return tunnel_proc
1169
1170
1171 def _setup_rpc(self, port, command_name):
1172 """Sets up a tunnel process and performs rpc connection book keeping.
1173
1174 This method assumes that xmlrpc and jsonrpc never conflict, since
1175 we can only either have an xmlrpc or a jsonrpc server listening on
1176 a remote port. As such, it enforces a single proxy->remote port
1177 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1178 and then tries to start an xmlrpc proxy forwarded to the same port,
1179 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1180
1181 1. None of the methods on the xmlrpc proxy will work because
1182 the server listening on B is jsonrpc.
1183
1184 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1185 server, as the only use case currently is goofy, which is tied to
1186 the factory image. It is much easier to handle a failed xmlrpc
1187 call on the client than it is to terminate goofy in this scenario,
1188 as doing the latter might leave the DUT in a hard to recover state.
1189
1190 With the current implementation newer rpc proxy connections will
1191 terminate the tunnel processes of older rpc connections tunneling
1192 to the same remote port. If methods are invoked on the client
1193 after this has happened they will fail with connection closed errors.
1194
1195 @param port: The remote forwarding port.
1196 @param command_name: The name of the remote process, to terminate
1197 using pkill.
1198
1199 @return A url that we can use to initiate the rpc connection.
1200 """
1201 self.rpc_disconnect(port)
1202 local_port = utils.get_unused_port()
1203 tunnel_proc = self._create_ssh_tunnel(port, local_port)
1204 self._rpc_proxy_map[port] = (command_name, tunnel_proc)
1205 return self._RPC_PROXY_URL % local_port
1206
1207
Christopher Wileyd78249a2013-03-01 13:05:31 -08001208 def xmlrpc_connect(self, command, port, command_name=None,
1209 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001210 """Connect to an XMLRPC server on the host.
1211
1212 The `command` argument should be a simple shell command that
1213 starts an XMLRPC server on the given `port`. The command
1214 must not daemonize, and must terminate cleanly on SIGTERM.
1215 The command is started in the background on the host, and a
1216 local XMLRPC client for the server is created and returned
1217 to the caller.
1218
1219 Note that the process of creating an XMLRPC client makes no
1220 attempt to connect to the remote server; the caller is
1221 responsible for determining whether the server is running
1222 correctly, and is ready to serve requests.
1223
Christopher Wileyd78249a2013-03-01 13:05:31 -08001224 Optionally, the caller can pass ready_test_name, a string
1225 containing the name of a method to call on the proxy. This
1226 method should take no parameters and return successfully only
1227 when the server is ready to process client requests. When
1228 ready_test_name is set, xmlrpc_connect will block until the
1229 proxy is ready, and throw a TestError if the server isn't
1230 ready by timeout_seconds.
1231
beeps32a63082013-08-22 14:02:29 -07001232 If a server is already running on the remote port, this
1233 method will kill it and disconnect the tunnel process
1234 associated with the connection before establishing a new one,
1235 by consulting the rpc_proxy_map in rpc_disconnect.
1236
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001237 @param command Shell command to start the server.
1238 @param port Port number on which the server is expected to
1239 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001240 @param command_name String to use as input to `pkill` to
1241 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001242 @param ready_test_name String containing the name of a
1243 method defined on the XMLRPC server.
1244 @param timeout_seconds Number of seconds to wait
1245 for the server to become 'ready.' Will throw a
1246 TestFail error if server is not ready in time.
1247
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001248 """
beeps32a63082013-08-22 14:02:29 -07001249 rpc_url = self._setup_rpc(port, command_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001250 # Start the server on the host. Redirection in the command
1251 # below is necessary, because 'ssh' won't terminate until
1252 # background child processes close stdin, stdout, and
1253 # stderr.
1254 remote_cmd = '( %s ) </dev/null >/dev/null 2>&1 & echo $!' % command
beeps32a63082013-08-22 14:02:29 -07001255 try:
1256 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
1257 except Exception as e:
1258 self.rpc_disconnect(port)
1259 raise
1260
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001261 logging.debug('Started XMLRPC server on host %s, pid = %s',
1262 self.hostname, remote_pid)
1263
Christopher Wileyd78249a2013-03-01 13:05:31 -08001264 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
1265 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001266 # retry.retry logs each attempt; calculate delay_sec to
1267 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001268 @retry.retry((socket.error,
1269 xmlrpclib.ProtocolError,
1270 httplib.BadStatusLine),
Christopher Wileyd78249a2013-03-01 13:05:31 -08001271 timeout_min=timeout_seconds/60.0,
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001272 delay_sec=min(max(timeout_seconds/20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001273 def ready_test():
1274 """ Call proxy.ready_test_name(). """
1275 getattr(proxy, ready_test_name)()
1276 successful = False
1277 try:
1278 logging.info('Waiting %d seconds for XMLRPC server '
1279 'to start.', timeout_seconds)
1280 ready_test()
1281 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001282 finally:
1283 if not successful:
1284 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001285 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001286 logging.info('XMLRPC server started successfully.')
1287 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001288
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001289
beeps32a63082013-08-22 14:02:29 -07001290 def jsonrpc_connect(self, port):
1291 """Creates a jsonrpc proxy connection through an ssh tunnel.
1292
1293 This method exists to facilitate communication with goofy (which is
1294 the default system manager on all factory images) and as such, leaves
1295 most of the rpc server sanity checking to the caller. Unlike
1296 xmlrpc_connect, this method does not facilitate the creation of a remote
1297 jsonrpc server, as the only clients of this code are factory tests,
1298 for which the goofy system manager is built in to the image and starts
1299 when the target boots.
1300
1301 One can theoretically create multiple jsonrpc proxies all forwarded
1302 to the same remote port, provided the remote port has an rpc server
1303 listening. However, in doing so we stand the risk of leaking an
1304 existing tunnel process, so we always disconnect any older tunnels
1305 we might have through rpc_disconnect.
1306
1307 @param port: port on the remote host that is serving this proxy.
1308
1309 @return: The client proxy.
1310 """
1311 if not jsonrpclib:
1312 logging.warning('Jsonrpclib could not be imported. Check that '
1313 'site-packages contains jsonrpclib.')
1314 return None
1315
1316 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1317
1318 logging.info('Established a jsonrpc connection through port %s.', port)
1319 return proxy
1320
1321
1322 def rpc_disconnect(self, port):
1323 """Disconnect from an RPC server on the host.
1324
1325 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001326 the given `port`. Also closes the local ssh tunnel created
1327 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001328 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001329 client object; however disconnection will cause all
1330 subsequent calls to methods on the object to fail.
1331
1332 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001333 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001334
1335 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001336 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001337 """
beeps32a63082013-08-22 14:02:29 -07001338 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001339 return
beeps32a63082013-08-22 14:02:29 -07001340 entry = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001341 remote_name = entry[0]
1342 tunnel_proc = entry[1]
1343 if remote_name:
1344 # We use 'pkill' to find our target process rather than
1345 # a PID, because the host may have rebooted since
1346 # connecting, and we don't want to kill an innocent
1347 # process with the same PID.
1348 #
1349 # 'pkill' helpfully exits with status 1 if no target
1350 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001351 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001352 # status.
1353 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
1354
1355 if tunnel_proc.poll() is None:
1356 tunnel_proc.terminate()
1357 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1358 else:
1359 logging.debug('Tunnel pid %d terminated early, status %d',
1360 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001361 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001362
1363
beeps32a63082013-08-22 14:02:29 -07001364 def rpc_disconnect_all(self):
1365 """Disconnect all known RPC proxy ports."""
1366 for port in self._rpc_proxy_map.keys():
1367 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001368
1369
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001370 def _ping_check_status(self, status):
1371 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001372
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001373 @param status Check the ping status against this value.
1374 @return True iff `status` and the result of ping are the same
1375 (i.e. both True or both False).
1376
1377 """
1378 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1379 return not (status ^ (ping_val == 0))
1380
1381 def _ping_wait_for_status(self, status, timeout):
1382 """Wait for the host to have a given status (UP or DOWN).
1383
1384 Status is checked by polling. Polling will not last longer
1385 than the number of seconds in `timeout`. The polling
1386 interval will be long enough that only approximately
1387 _PING_WAIT_COUNT polling cycles will be executed, subject
1388 to a maximum interval of about one minute.
1389
1390 @param status Waiting will stop immediately if `ping` of the
1391 host returns this status.
1392 @param timeout Poll for at most this many seconds.
1393 @return True iff the host status from `ping` matched the
1394 requested status at the time of return.
1395
1396 """
1397 # _ping_check_status() takes about 1 second, hence the
1398 # "- 1" in the formula below.
1399 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1400 end_time = time.time() + timeout
1401 while time.time() <= end_time:
1402 if self._ping_check_status(status):
1403 return True
1404 if poll_interval > 0:
1405 time.sleep(poll_interval)
1406
1407 # The last thing we did was sleep(poll_interval), so it may
1408 # have been too long since the last `ping`. Check one more
1409 # time, just to be sure.
1410 return self._ping_check_status(status)
1411
1412 def ping_wait_up(self, timeout):
1413 """Wait for the host to respond to `ping`.
1414
1415 N.B. This method is not a reliable substitute for
1416 `wait_up()`, because a host that responds to ping will not
1417 necessarily respond to ssh. This method should only be used
1418 if the target DUT can be considered functional even if it
1419 can't be reached via ssh.
1420
1421 @param timeout Minimum time to allow before declaring the
1422 host to be non-responsive.
1423 @return True iff the host answered to ping before the timeout.
1424
1425 """
1426 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001427
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001428 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001429 """Wait until the host no longer responds to `ping`.
1430
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001431 This function can be used as a slightly faster version of
1432 `wait_down()`, by avoiding potentially long ssh timeouts.
1433
1434 @param timeout Minimum time to allow for the host to become
1435 non-responsive.
1436 @return True iff the host quit answering ping before the
1437 timeout.
1438
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001439 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001440 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001441
1442 def test_wait_for_sleep(self):
1443 """Wait for the client to enter low-power sleep mode.
1444
1445 The test for "is asleep" can't distinguish a system that is
1446 powered off; to confirm that the unit was asleep, it is
1447 necessary to force resume, and then call
1448 `test_wait_for_resume()`.
1449
1450 This function is expected to be called from a test as part
1451 of a sequence like the following:
1452
1453 ~~~~~~~~
1454 boot_id = host.get_boot_id()
1455 # trigger sleep on the host
1456 host.test_wait_for_sleep()
1457 # trigger resume on the host
1458 host.test_wait_for_resume(boot_id)
1459 ~~~~~~~~
1460
1461 @exception TestFail The host did not go to sleep within
1462 the allowed time.
1463 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001464 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001465 raise error.TestFail(
1466 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001467 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001468
1469
1470 def test_wait_for_resume(self, old_boot_id):
1471 """Wait for the client to resume from low-power sleep mode.
1472
1473 The `old_boot_id` parameter should be the value from
1474 `get_boot_id()` obtained prior to entering sleep mode. A
1475 `TestFail` exception is raised if the boot id changes.
1476
1477 See @ref test_wait_for_sleep for more on this function's
1478 usage.
1479
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001480 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001481 target host went to sleep.
1482
1483 @exception TestFail The host did not respond within the
1484 allowed time.
1485 @exception TestFail The host responded, but the boot id test
1486 indicated a reboot rather than a sleep
1487 cycle.
1488 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001489 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001490 raise error.TestFail(
1491 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001492 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001493 else:
1494 new_boot_id = self.get_boot_id()
1495 if new_boot_id != old_boot_id:
1496 raise error.TestFail(
1497 'client rebooted, but sleep was expected'
1498 ' (old boot %s, new boot %s)'
1499 % (old_boot_id, new_boot_id))
1500
1501
1502 def test_wait_for_shutdown(self):
1503 """Wait for the client to shut down.
1504
1505 The test for "has shut down" can't distinguish a system that
1506 is merely asleep; to confirm that the unit was down, it is
1507 necessary to force boot, and then call test_wait_for_boot().
1508
1509 This function is expected to be called from a test as part
1510 of a sequence like the following:
1511
1512 ~~~~~~~~
1513 boot_id = host.get_boot_id()
1514 # trigger shutdown on the host
1515 host.test_wait_for_shutdown()
1516 # trigger boot on the host
1517 host.test_wait_for_boot(boot_id)
1518 ~~~~~~~~
1519
1520 @exception TestFail The host did not shut down within the
1521 allowed time.
1522 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001523 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001524 raise error.TestFail(
1525 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001526 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001527
1528
1529 def test_wait_for_boot(self, old_boot_id=None):
1530 """Wait for the client to boot from cold power.
1531
1532 The `old_boot_id` parameter should be the value from
1533 `get_boot_id()` obtained prior to shutting down. A
1534 `TestFail` exception is raised if the boot id does not
1535 change. The boot id test is omitted if `old_boot_id` is not
1536 specified.
1537
1538 See @ref test_wait_for_shutdown for more on this function's
1539 usage.
1540
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001541 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001542 shut down.
1543
1544 @exception TestFail The host did not respond within the
1545 allowed time.
1546 @exception TestFail The host responded, but the boot id test
1547 indicated that there was no reboot.
1548 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001549 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001550 raise error.TestFail(
1551 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001552 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001553 elif old_boot_id:
1554 if self.get_boot_id() == old_boot_id:
1555 raise error.TestFail(
1556 'client is back up, but did not reboot'
1557 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001558
1559
1560 @staticmethod
1561 def check_for_rpm_support(hostname):
1562 """For a given hostname, return whether or not it is powered by an RPM.
1563
Simran Basi1df55112013-09-06 11:25:09 -07001564 @param hostname: hostname to check for rpm support.
1565
Simran Basid5e5e272012-09-24 15:23:59 -07001566 @return None if this host does not follows the defined naming format
1567 for RPM powered DUT's in the lab. If it does follow the format,
1568 it returns a regular expression MatchObject instead.
1569 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001570 return re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001571
1572
1573 def has_power(self):
1574 """For this host, return whether or not it is powered by an RPM.
1575
1576 @return True if this host is in the CROS lab and follows the defined
1577 naming format.
1578 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001579 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001580
1581
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001582 def _set_power(self, state, power_method):
1583 """Sets the power to the host via RPM, Servo or manual.
1584
1585 @param state Specifies which power state to set to DUT
1586 @param power_method Specifies which method of power control to
1587 use. By default "RPM" will be used. Valid values
1588 are the strings "RPM", "manual", "servoj10".
1589
1590 """
1591 ACCEPTABLE_STATES = ['ON', 'OFF']
1592
1593 if state.upper() not in ACCEPTABLE_STATES:
1594 raise error.TestError('State must be one of: %s.'
1595 % (ACCEPTABLE_STATES,))
1596
1597 if power_method == self.POWER_CONTROL_SERVO:
1598 logging.info('Setting servo port J10 to %s', state)
1599 self.servo.set('prtctl3_pwren', state.lower())
1600 time.sleep(self._USB_POWER_TIMEOUT)
1601 elif power_method == self.POWER_CONTROL_MANUAL:
1602 logging.info('You have %d seconds to set the AC power to %s.',
1603 self._POWER_CYCLE_TIMEOUT, state)
1604 time.sleep(self._POWER_CYCLE_TIMEOUT)
1605 else:
1606 if not self.has_power():
1607 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001608 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1609 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1610 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07001611 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07001612
1613
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001614 def power_off(self, power_method=POWER_CONTROL_RPM):
1615 """Turn off power to this host via RPM, Servo or manual.
1616
1617 @param power_method Specifies which method of power control to
1618 use. By default "RPM" will be used. Valid values
1619 are the strings "RPM", "manual", "servoj10".
1620
1621 """
1622 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001623
1624
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001625 def power_on(self, power_method=POWER_CONTROL_RPM):
1626 """Turn on power to this host via RPM, Servo or manual.
1627
1628 @param power_method Specifies which method of power control to
1629 use. By default "RPM" will be used. Valid values
1630 are the strings "RPM", "manual", "servoj10".
1631
1632 """
1633 self._set_power('ON', power_method)
1634
1635
1636 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1637 """Cycle power to this host by turning it OFF, then ON.
1638
1639 @param power_method Specifies which method of power control to
1640 use. By default "RPM" will be used. Valid values
1641 are the strings "RPM", "manual", "servoj10".
1642
1643 """
1644 if power_method in (self.POWER_CONTROL_SERVO,
1645 self.POWER_CONTROL_MANUAL):
1646 self.power_off(power_method=power_method)
1647 time.sleep(self._POWER_CYCLE_TIMEOUT)
1648 self.power_on(power_method=power_method)
1649 else:
1650 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001651
1652
1653 def get_platform(self):
1654 """Determine the correct platform label for this host.
1655
1656 @returns a string representing this host's platform.
1657 """
1658 crossystem = utils.Crossystem(self)
1659 crossystem.init()
1660 # Extract fwid value and use the leading part as the platform id.
1661 # fwid generally follow the format of {platform}.{firmware version}
1662 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1663 platform = crossystem.fwid().split('.')[0].lower()
1664 # Newer platforms start with 'Google_' while the older ones do not.
1665 return platform.replace('google_', '')
1666
1667
Aviv Keshet74c89a92013-02-04 15:18:30 -08001668 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001669 def get_board(self):
1670 """Determine the correct board label for this host.
1671
1672 @returns a string representing this host's board.
1673 """
1674 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1675 run_method=self.run)
1676 board = release_info['CHROMEOS_RELEASE_BOARD']
1677 # Devices in the lab generally have the correct board name but our own
1678 # development devices have {board_name}-signed-{key_type}. The board
1679 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001680 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001681 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001682 return board_format_string % board.split('-')[0]
1683 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001684
1685
Aviv Keshet74c89a92013-02-04 15:18:30 -08001686 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001687 def has_lightsensor(self):
1688 """Determine the correct board label for this host.
1689
1690 @returns the string 'lightsensor' if this host has a lightsensor or
1691 None if it does not.
1692 """
1693 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001694 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001695 try:
1696 # Run the search cmd following the symlinks. Stderr_tee is set to
1697 # None as there can be a symlink loop, but the command will still
1698 # execute correctly with a few messages printed to stderr.
1699 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1700 return 'lightsensor'
1701 except error.AutoservRunError:
1702 # egrep exited with a return code of 1 meaning none of the possible
1703 # lightsensor files existed.
1704 return None
1705
1706
Aviv Keshet74c89a92013-02-04 15:18:30 -08001707 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001708 def has_bluetooth(self):
1709 """Determine the correct board label for this host.
1710
1711 @returns the string 'bluetooth' if this host has bluetooth or
1712 None if it does not.
1713 """
1714 try:
1715 self.run('test -d /sys/class/bluetooth/hci0')
1716 # test exited with a return code of 0.
1717 return 'bluetooth'
1718 except error.AutoservRunError:
1719 # test exited with a return code 1 meaning the directory did not
1720 # exist.
1721 return None
1722
1723
Ilja Friedel0ce0b602013-08-15 18:45:27 -07001724 @label_decorator('graphics')
1725 def get_graphics(self):
1726 """
1727 Determine the correct board label for this host.
1728
1729 @returns a string representing this host's graphics. For now ARM boards
1730 return graphics:gles while all other boards return graphics:gl. This
1731 may change over time, but for robustness reasons this should avoid
1732 executing code in actual graphics libraries (which may not be ready and
1733 is tested by graphics_GLAPICheck).
1734 """
1735 uname = self.run('uname -a').stdout.lower()
1736 if 'arm' in uname:
1737 return 'graphics:gles'
1738 return 'graphics:gl'
1739
1740
Simran Basic6f1f7a2012-10-16 10:47:46 -07001741 def get_labels(self):
1742 """Return a list of labels for this given host.
1743
1744 This is the main way to retrieve all the automatic labels for a host
1745 as it will run through all the currently implemented label functions.
1746 """
1747 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001748 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001749 label = label_function(self)
1750 if label:
1751 labels.append(label)
1752 return labels