blob: d71ebe0a307127bb539510f460b441d624d401a2 [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07006import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import logging
Dan Shi0f466e82013-02-22 15:44:58 -08008import os
Simran Basid5e5e272012-09-24 15:23:59 -07009import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080010import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070011import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070012import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070013import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070014
J. Richard Barnette45e93de2012-04-11 17:24:15 -070015from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080016from autotest_lib.client.common_lib import error
17from autotest_lib.client.common_lib import global_config
beeps687243d2013-07-18 15:29:27 -070018from autotest_lib.client.common_lib import site_utils
J. Richard Barnette45e93de2012-04-11 17:24:15 -070019from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080020from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080021from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080022from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070023from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070024from autotest_lib.server import autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070025from autotest_lib.server import utils as server_utils
Scott Zawalski89c44dd2013-02-26 09:28:02 -050026from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070027from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Fang Deng96667ca2013-08-01 17:46:18 -070028from autotest_lib.server.hosts import abstract_ssh
Fang Deng5d518f42013-08-02 14:04:32 -070029from autotest_lib.server.hosts import servo_host
beeps687243d2013-07-18 15:29:27 -070030from autotest_lib.site_utils.graphite import stats
Simran Basidcff4252012-11-20 16:13:20 -080031from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070032
33
beeps32a63082013-08-22 14:02:29 -070034try:
35 import jsonrpclib
36except ImportError:
37 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070038
Fang Dengd1c2b732013-08-20 12:59:46 -070039
beepsc87ff602013-07-31 21:53:00 -070040class FactoryImageCheckerException(error.AutoservError):
41 """Exception raised when an image is a factory image."""
42 pass
43
44
Aviv Keshet74c89a92013-02-04 15:18:30 -080045def add_label_detector(label_function_list, label_list=None, label=None):
46 """Decorator used to group functions together into the provided list.
47 @param label_function_list: List of label detecting functions to add
48 decorated function to.
49 @param label_list: List of detectable labels to add detectable labels to.
50 (Default: None)
51 @param label: Label string that is detectable by this detection function
52 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080053 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070054 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080055 """
56 @param func: The function to be added as a detector.
57 """
58 label_function_list.append(func)
59 if label and label_list is not None:
60 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070061 return func
62 return add_func
63
64
Fang Deng0ca40e22013-08-27 17:47:44 -070065class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070066 """Chromium OS specific subclass of Host."""
67
68 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050069 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070070
Richard Barnette03a0c132012-11-05 12:40:35 -080071 # Timeout values (in seconds) associated with various Chrome OS
72 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070073 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080074 # In general, a good rule of thumb is that the timeout can be up
75 # to twice the typical measured value on the slowest platform.
76 # The times here have not necessarily been empirically tested to
77 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070078 #
79 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080080 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
81 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070082 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080083 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080084 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070085 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080086 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080087 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080088 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
Chris Sosab76e0ee2013-05-22 16:55:41 -070089 # REBOOT_TIMEOUT: How long to wait for a reboot.
beepsf079cfb2013-09-18 17:49:51 -070090 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070091
92 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -080093 RESUME_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -070094 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070095 USB_BOOT_TIMEOUT = 150
Dan Shi2c88eed2013-11-12 10:18:38 -080096 POWERWASH_BOOT_TIMEOUT = 60
Chris Sosab76e0ee2013-05-22 16:55:41 -070097
98 # We have a long timeout to ensure we don't flakily fail due to other
99 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
Simran Basi1160e2c2013-10-04 16:00:24 -0700100 # TODO(sbasi - crbug.com/276094) Restore to 5 mins once the 'host did not
101 # return from reboot' bug is solved.
102 REBOOT_TIMEOUT = 480
Chris Sosab76e0ee2013-05-22 16:55:41 -0700103
beepsf079cfb2013-09-18 17:49:51 -0700104 INSTALL_TIMEOUT = 240
Richard Barnette03a0c132012-11-05 12:40:35 -0800105
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800106 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
107 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
108 _USB_POWER_TIMEOUT = 5
109 _POWER_CYCLE_TIMEOUT = 10
110
beeps32a63082013-08-22 14:02:29 -0700111 _RPC_PROXY_URL = 'http://localhost:%d'
Christopher Wileydd181852013-10-10 19:56:58 -0700112 _RPC_SHUTDOWN_POLLING_PERIOD_SECONDS = 2
113 _RPC_SHUTDOWN_TIMEOUT_SECONDS = 20
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800114
Richard Barnette82c35912012-11-20 10:09:10 -0800115 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
116 'rpm_recovery_boards', type=str).split(',')
117
118 _MAX_POWER_CYCLE_ATTEMPTS = 6
119 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
120 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
121 'host[0-9]+')
122 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
123 'in_illuminance0_raw',
124 'illuminance0_input']
125 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
126 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800127 _DETECTABLE_LABELS = []
128 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
129 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700130
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800131 # Constants used in ping_wait_up() and ping_wait_down().
132 #
133 # _PING_WAIT_COUNT is the approximate number of polling
134 # cycles to use when waiting for a host state change.
135 #
136 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
137 # for arguments to the internal _ping_wait_for_status()
138 # method.
139 _PING_WAIT_COUNT = 40
140 _PING_STATUS_DOWN = False
141 _PING_STATUS_UP = True
142
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800143 # Allowed values for the power_method argument.
144
145 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
146 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
147 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
148 POWER_CONTROL_RPM = 'RPM'
149 POWER_CONTROL_SERVO = 'servoj10'
150 POWER_CONTROL_MANUAL = 'manual'
151
152 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
153 POWER_CONTROL_SERVO,
154 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800155
Simran Basi5e6339a2013-03-21 11:34:32 -0700156 _RPM_OUTLET_CHANGED = 'outlet_changed'
157
beeps687243d2013-07-18 15:29:27 -0700158
J. Richard Barnette964fba02012-10-24 17:34:29 -0700159 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800160 def get_servo_arguments(args_dict):
161 """Extract servo options from `args_dict` and return the result.
162
163 Take the provided dictionary of argument options and return
164 a subset that represent standard arguments needed to
165 construct a servo object for a host. The intent is to
166 provide standard argument processing from run_remote_tests
167 for tests that require a servo to operate.
168
169 Recommended usage:
170 ~~~~~~~~
171 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700172 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800173 host = hosts.create_host(machine, servo_args=servo_args)
174 ~~~~~~~~
175
176 @param args_dict Dictionary from which to extract the servo
177 arguments.
178 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700179 servo_args = {}
180 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800181 if arg in args_dict:
182 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700183 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700184
J. Richard Barnette964fba02012-10-24 17:34:29 -0700185
Fang Dengd1c2b732013-08-20 12:59:46 -0700186 def _initialize(self, hostname, servo_args=None, ssh_verbosity_flag='',
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700187 ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700188 *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700189 """Initialize superclasses, and |self.servo|.
190
Fang Deng5d518f42013-08-02 14:04:32 -0700191 This method checks whether a servo is required by checking whether
192 servo_args is None. This method will only attempt to create a servo
193 object when servo is required by the test.
194
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700195 For creating the host servo object, there are three
196 possibilities: First, if the host is a lab system known to
197 have a servo board, we connect to that servo unconditionally.
198 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700199 servo features for testing, it will pass settings for
200 `servo_host`, `servo_port`, or both. If neither of these
201 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700202
203 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700204 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700205 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700206 # self.env is a dictionary of environment variable settings
207 # to be exported for commands run on the host.
208 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
209 # errors that might happen.
210 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700211 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700212 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700213 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700214 self.servo = None
215 # TODO(fdeng): We need to simplify the
216 # process of servo and servo_host initialization.
217 # crbug.com/298432
218 self._servo_host = self._create_servo_host(servo_args)
219 # TODO(fdeng): 'servo_args is not None' is used to determine whether
220 # a test needs a servo. Better solution is needed.
221 # There are three possible cases here:
222 # 1. servo_arg is None
223 # 2. servo arg is an empty dictionary
224 # 3. servo_arg is a dictionary that has entries of 'servo_host',
225 # 'servo_port'(optional).
226 # We assume that:
227 # a. A test that requires a servo always calls get_servo_arguments
228 # and passes in its return value as |servo_args|.
229 # b. get_servo_arguments never returns None.
230 # Based on the assumptions, we reason that only in case 2 and 3
231 # a servo is required, i.e. when the servo_args is not None.
232 if servo_args is not None:
233 self.servo = self._servo_host.create_healthy_servo_object()
234
235
236 def _create_servo_host(self, servo_args):
237 """Create a ServoHost object.
238
239 There three possible cases:
240 1) If the DUT is in Cros Lab and has a beaglebone and a servo, then
241 create a ServoHost object pointing to the beaglebone. servo_args
242 is ignored.
243 2) If not case 1) and servo_args is neither None nor empty, then
244 create a ServoHost object using servo_args.
245 3) If neither case 1) or 2) applies, return None.
246
247 @param servo_args: A dictionary that contains args for creating
248 a ServoHost object,
249 e.g. {'servo_host': '172.11.11.111',
250 'servo_port': 9999}.
251 See comments above.
252
253 @returns: A ServoHost object or None. See comments above.
254
255 """
256 servo_host_name = servo_host.make_servo_hostname(self.hostname)
257 if utils.host_is_in_lab_zone(servo_host_name):
258 return servo_host.ServoHost(servo_host=servo_host_name)
259 elif servo_args is not None:
260 return servo_host.ServoHost(**servo_args)
261 else:
262 return None
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700263
264
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500265 def get_repair_image_name(self):
266 """Generate a image_name from variables in the global config.
267
268 @returns a str of $board-version/$BUILD.
269
270 """
271 stable_version = global_config.global_config.get_config_value(
272 'CROS', 'stable_cros_version')
273 build_pattern = global_config.global_config.get_config_value(
274 'CROS', 'stable_build_pattern')
275 board = self._get_board_from_afe()
276 if board is None:
277 raise error.AutoservError('DUT has no board attribute, '
278 'cannot be repaired.')
279 return build_pattern % (board, stable_version)
280
281
Scott Zawalski62bacae2013-03-05 10:40:32 -0500282 def _host_in_AFE(self):
283 """Check if the host is an object the AFE knows.
284
285 @returns the host object.
286 """
287 return self._AFE.get_hosts(hostname=self.hostname)
288
289
Chris Sosab76e0ee2013-05-22 16:55:41 -0700290 def lookup_job_repo_url(self):
291 """Looks up the job_repo_url for the host.
292
293 @returns job_repo_url from AFE or None if not found.
294
295 @raises KeyError if the host does not have a job_repo_url
296 """
297 if not self._host_in_AFE():
298 return None
299
300 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700301 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
302 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700303
304
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500305 def clear_cros_version_labels_and_job_repo_url(self):
306 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500307 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400308 return
309
Scott Zawalski62bacae2013-03-05 10:40:32 -0500310 host_list = [self.hostname]
311 labels = self._AFE.get_labels(
312 name__startswith=ds_constants.VERSION_PREFIX,
313 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800314
Scott Zawalski62bacae2013-03-05 10:40:32 -0500315 for label in labels:
316 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500317
beepscb6f1e22013-06-28 19:14:10 -0700318 self.update_job_repo_url(None, None)
319
320
321 def update_job_repo_url(self, devserver_url, image_name):
322 """
323 Updates the job_repo_url host attribute and asserts it's value.
324
325 @param devserver_url: The devserver to use in the job_repo_url.
326 @param image_name: The name of the image to use in the job_repo_url.
327
328 @raises AutoservError: If we failed to update the job_repo_url.
329 """
330 repo_url = None
331 if devserver_url and image_name:
332 repo_url = tools.get_package_url(devserver_url, image_name)
333 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500334 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700335 if self.lookup_job_repo_url() != repo_url:
336 raise error.AutoservError('Failed to update job_repo_url with %s, '
337 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500338
339
Dan Shie9309262013-06-19 22:50:21 -0700340 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400341 """Add cros_version labels and host attribute job_repo_url.
342
343 @param image_name: The name of the image e.g.
344 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700345
Scott Zawalskieadbf702013-03-14 09:23:06 -0400346 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500347 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400348 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500349
Scott Zawalskieadbf702013-03-14 09:23:06 -0400350 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700351 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500352
353 labels = self._AFE.get_labels(name=cros_label)
354 if labels:
355 label = labels[0]
356 else:
357 label = self._AFE.create_label(name=cros_label)
358
359 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700360 self.update_job_repo_url(devserver_url, image_name)
361
362
beepsdae65fd2013-07-26 16:24:41 -0700363 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700364 """
365 Make sure job_repo_url of this host is valid.
366
joychen03eaad92013-06-26 09:55:21 -0700367 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700368 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
369 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
370 download and extract it. If the devserver embedded in the url is
371 unresponsive, update the job_repo_url of the host after staging it on
372 another devserver.
373
374 @param job_repo_url: A url pointing to the devserver where the autotest
375 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700376 @param tag: The tag from the server job, in the format
377 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700378
379 @raises DevServerException: If we could not resolve a devserver.
380 @raises AutoservError: If we're unable to save the new job_repo_url as
381 a result of choosing a new devserver because the old one failed to
382 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700383 @raises urllib2.URLError: If the devserver embedded in job_repo_url
384 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700385 """
386 job_repo_url = self.lookup_job_repo_url()
387 if not job_repo_url:
388 logging.warning('No job repo url set on host %s', self.hostname)
389 return
390
391 logging.info('Verifying job repo url %s', job_repo_url)
392 devserver_url, image_name = tools.get_devserver_build_from_package_url(
393 job_repo_url)
394
beeps0c865032013-07-30 11:37:06 -0700395 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700396
397 logging.info('Staging autotest artifacts for %s on devserver %s',
398 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700399
400 start_time = time.time()
beepscb6f1e22013-06-28 19:14:10 -0700401 ds.stage_artifacts(image_name, ['autotest'])
beeps687243d2013-07-18 15:29:27 -0700402 stage_time = time.time() - start_time
403
404 # Record how much of the verification time comes from a devserver
405 # restage. If we're doing things right we should not see multiple
406 # devservers for a given board/build/branch path.
407 try:
408 board, build_type, branch = site_utils.ParseBuildName(
409 image_name)[:3]
Chris Sosa65425082013-10-16 13:26:22 -0700410 except site_utils.ParseBuildNameException:
beeps687243d2013-07-18 15:29:27 -0700411 pass
412 else:
beeps0c865032013-07-30 11:37:06 -0700413 devserver = devserver_url[
Chris Sosa65425082013-10-16 13:26:22 -0700414 devserver_url.find('/') + 2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700415 stats_key = {
416 'board': board,
417 'build_type': build_type,
418 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700419 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700420 }
421 stats.Gauge('verify_job_repo_url').send(
422 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
423 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700424
Scott Zawalskieadbf702013-03-14 09:23:06 -0400425
Dan Shi0f466e82013-02-22 15:44:58 -0800426 def _try_stateful_update(self, update_url, force_update, updater):
427 """Try to use stateful update to initialize DUT.
428
429 When DUT is already running the same version that machine_install
430 tries to install, stateful update is a much faster way to clean up
431 the DUT for testing, compared to a full reimage. It is implemeted
432 by calling autoupdater.run_update, but skipping updating root, as
433 updating the kernel is time consuming and not necessary.
434
435 @param update_url: url of the image.
436 @param force_update: Set to True to update the image even if the DUT
437 is running the same version.
438 @param updater: ChromiumOSUpdater instance used to update the DUT.
439 @returns: True if the DUT was updated with stateful update.
440
441 """
442 if not updater.check_version():
443 return False
444 if not force_update:
445 logging.info('Canceling stateful update because the new and '
446 'old versions are the same.')
447 return False
448 # Following folders should be rebuilt after stateful update.
449 # A test file is used to confirm each folder gets rebuilt after
450 # the stateful update.
451 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
452 test_file = '.test_file_to_be_deleted'
453 for folder in folders_to_check:
454 touch_path = os.path.join(folder, test_file)
455 self.run('touch %s' % touch_path)
456
457 if not updater.run_update(force_update=True, update_root=False):
458 return False
459
460 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700461 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800462 check_file_cmd = 'test -f %s; echo $?'
463 for folder in folders_to_check:
464 test_file_path = os.path.join(folder, test_file)
465 result = self.run(check_file_cmd % test_file_path,
466 ignore_status=True)
467 if result.exit_status == 1:
468 return False
469 return True
470
471
J. Richard Barnette7275b612013-06-04 18:13:11 -0700472 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800473 """After the DUT is updated, confirm machine_install succeeded.
474
475 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700476 @param expected_kernel: kernel expected to be active after reboot,
477 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800478
479 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700480 # Touch the lab machine file to leave a marker that
481 # distinguishes this image from other test images.
482 # Afterwards, we must re-run the autoreboot script because
483 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800484 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800485 self.run('start autoreboot')
Chris Sosa65425082013-10-16 13:26:22 -0700486 updater.verify_boot_expectations(
487 expected_kernel, rollback_message=
488 'Build %s failed to boot on %s; system rolled back to previous'
489 'build' % (updater.update_version, self.hostname))
J. Richard Barnette7275b612013-06-04 18:13:11 -0700490 # Check that we've got the build we meant to install.
491 if not updater.check_version_to_confirm_install():
492 raise autoupdater.ChromiumOSError(
493 'Failed to update %s to build %s; found build '
494 '%s instead' % (self.hostname,
Chris Sosa65425082013-10-16 13:26:22 -0700495 updater.update_version,
496 updater.get_build_id()))
Dan Shi0f466e82013-02-22 15:44:58 -0800497
498
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700499 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400500 """Stage a build on a devserver and return the update_url.
501
502 @param image_name: a name like lumpy-release/R27-3837.0.0
503 @returns an update URL like:
504 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
505 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700506 if not image_name:
507 image_name = self.get_repair_image_name()
508 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400509 devserver = dev_server.ImageServer.resolve(image_name)
510 devserver.trigger_download(image_name, synchronous=False)
511 return tools.image_url_pattern() % (devserver.url(), image_name)
512
513
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700514 def stage_image_for_servo(self, image_name=None):
515 """Stage a build on a devserver and return the update_url.
516
517 @param image_name: a name like lumpy-release/R27-3837.0.0
518 @returns an update URL like:
519 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
520 """
521 if not image_name:
522 image_name = self.get_repair_image_name()
523 logging.info('Staging build for servo install: %s', image_name)
524 devserver = dev_server.ImageServer.resolve(image_name)
525 devserver.stage_artifacts(image_name, ['test_image'])
526 return devserver.get_test_image_url(image_name)
527
528
beepse539be02013-07-31 21:57:39 -0700529 def stage_factory_image_for_servo(self, image_name):
530 """Stage a build on a devserver and return the update_url.
531
532 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700533
beepse539be02013-07-31 21:57:39 -0700534 @return: An update URL, eg:
535 http://<devserver>/static/canary-channel/\
536 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700537
538 @raises: ValueError if the factory artifact name is missing from
539 the config.
540
beepse539be02013-07-31 21:57:39 -0700541 """
542 if not image_name:
543 logging.error('Need an image_name to stage a factory image.')
544 return
545
beeps12c0a3c2013-09-03 11:58:27 -0700546 factory_artifact = global_config.global_config.get_config_value(
547 'CROS', 'factory_artifact', type=str, default='')
548 if not factory_artifact:
549 raise ValueError('Cannot retrieve the factory artifact name from '
550 'autotest config, and hence cannot stage factory '
551 'artifacts.')
552
beepse539be02013-07-31 21:57:39 -0700553 logging.info('Staging build for servo install: %s', image_name)
554 devserver = dev_server.ImageServer.resolve(image_name)
555 devserver.stage_artifacts(
556 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700557 [factory_artifact],
558 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700559
560 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
561
562
Chris Sosaa3ac2152012-05-23 22:23:13 -0700563 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500564 local_devserver=False, repair=False):
565 """Install the DUT.
566
Dan Shi0f466e82013-02-22 15:44:58 -0800567 Use stateful update if the DUT is already running the same build.
568 Stateful update does not update kernel and tends to run much faster
569 than a full reimage. If the DUT is running a different build, or it
570 failed to do a stateful update, full update, including kernel update,
571 will be applied to the DUT.
572
Scott Zawalskieadbf702013-03-14 09:23:06 -0400573 Once a host enters machine_install its cros_version label will be
574 removed as well as its host attribute job_repo_url (used for
575 package install).
576
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500577 @param update_url: The url to use for the update
578 pattern: http://$devserver:###/update/$build
579 If update_url is None and repair is True we will install the
580 stable image listed in global_config under
581 CROS.stable_cros_version.
582 @param force_update: Force an update even if the version installed
583 is the same. Default:False
584 @param local_devserver: Used by run_remote_test to allow people to
585 use their local devserver. Default: False
586 @param repair: Whether or not we are in repair mode. This adds special
587 cases for repairing a machine like starting update_engine.
588 Setting repair to True sets force_update to True as well.
589 default: False
590 @raises autoupdater.ChromiumOSError
591
592 """
Dan Shi7458bf62013-06-10 12:50:16 -0700593 if update_url:
594 logging.debug('update url is set to %s', update_url)
595 else:
596 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700597 if self._parser.options.image:
598 requested_build = self._parser.options.image
599 if requested_build.startswith('http://'):
600 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700601 logging.debug('update url is retrieved from requested_build'
602 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700603 else:
604 # Try to stage any build that does not start with
605 # http:// on the devservers defined in
606 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700607 update_url = self._stage_image_for_update(requested_build)
608 logging.debug('Build staged, and update_url is set to: %s',
609 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700610 elif repair:
611 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700612 logging.debug('Build staged, and update_url is set to: %s',
613 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400614 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700615 raise autoupdater.ChromiumOSError(
616 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500617
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500618 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800619 # In case the system is in a bad state, we always reboot the machine
620 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700621 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500622 self.run('stop update-engine; start update-engine')
623 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800624
Chris Sosaa3ac2152012-05-23 22:23:13 -0700625 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700626 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800627 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400628 # Remove cros-version and job_repo_url host attribute from host.
629 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800630 # If the DUT is already running the same build, try stateful update
631 # first. Stateful update does not update kernel and tends to run much
632 # faster than a full reimage.
633 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700634 updated = self._try_stateful_update(
635 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800636 if updated:
637 logging.info('DUT is updated with stateful update.')
638 except Exception as e:
639 logging.exception(e)
640 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700641
Dan Shi0f466e82013-02-22 15:44:58 -0800642 inactive_kernel = None
643 # Do a full update if stateful update is not applicable or failed.
644 if not updated:
645 # In case the system is in a bad state, we always reboot the
646 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700647 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700648
649 # TODO(sosa): Remove temporary hack to get rid of bricked machines
650 # that can't update due to a corrupted policy.
651 self.run('rm -rf /var/lib/whitelist')
652 self.run('touch /var/lib/whitelist')
653 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400654 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700655
Dan Shi0f466e82013-02-22 15:44:58 -0800656 if updater.run_update(force_update):
657 updated = True
658 # Figure out active and inactive kernel.
659 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700660
Dan Shi0f466e82013-02-22 15:44:58 -0800661 # Ensure inactive kernel has higher priority than active.
662 if (updater.get_kernel_priority(inactive_kernel)
663 < updater.get_kernel_priority(active_kernel)):
664 raise autoupdater.ChromiumOSError(
665 'Update failed. The priority of the inactive kernel'
666 ' partition is less than that of the active kernel'
667 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700668
Dan Shi0f466e82013-02-22 15:44:58 -0800669 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700670 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700671
Dan Shi0f466e82013-02-22 15:44:58 -0800672 if updated:
673 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400674 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700675 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800676
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700677 # Clean up any old autotest directories which may be lying around.
678 for path in global_config.global_config.get_config_value(
679 'AUTOSERV', 'client_autodir_paths', type=list):
680 self.run('rm -rf ' + path)
681
682
Dan Shi10e992b2013-08-30 11:02:59 -0700683 def show_update_engine_log(self):
684 """Output update engine log."""
685 logging.debug('Dumping %s', constants.UPDATE_ENGINE_LOG)
686 self.run('cat %s' % constants.UPDATE_ENGINE_LOG)
687
688
Richard Barnette82c35912012-11-20 10:09:10 -0800689 def _get_board_from_afe(self):
690 """Retrieve this host's board from its labels in the AFE.
691
692 Looks for a host label of the form "board:<board>", and
693 returns the "<board>" part of the label. `None` is returned
694 if there is not a single, unique label matching the pattern.
695
696 @returns board from label, or `None`.
697 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700698 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800699
700
701 def get_build(self):
702 """Retrieve the current build for this Host from the AFE.
703
704 Looks through this host's labels in the AFE to determine its build.
705
706 @returns The current build or None if it could not find it or if there
707 were multiple build labels assigned to this host.
708 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700709 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800710
711
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500712 def _install_repair(self):
713 """Attempt to repair this host using upate-engine.
714
715 If the host is up, try installing the DUT with a stable
716 "repair" version of Chrome OS as defined in the global_config
717 under CROS.stable_cros_version.
718
Scott Zawalski62bacae2013-03-05 10:40:32 -0500719 @raises AutoservRepairMethodNA if the DUT is not reachable.
720 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500721
722 """
723 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500724 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500725
726 logging.info('Attempting to reimage machine to repair image.')
727 try:
728 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700729 except autoupdater.ChromiumOSError as e:
730 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500731 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500732 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500733
734
Dan Shi2c88eed2013-11-12 10:18:38 -0800735 def _install_repair_with_powerwash(self):
Dan Shi9cc48452013-11-12 12:39:26 -0800736 """Attempt to powerwash first then repair this host using update-engine.
Dan Shi2c88eed2013-11-12 10:18:38 -0800737
Dan Shi9cc48452013-11-12 12:39:26 -0800738 update-engine may fail due to a bad image. In such case, powerwash
739 may help to cleanup the DUT for update-engine to work again.
Dan Shi2c88eed2013-11-12 10:18:38 -0800740
741 @raises AutoservRepairMethodNA if the DUT is not reachable.
742 @raises ChromiumOSError if the install failed for some reason.
743
744 """
745 if not self.is_up():
746 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
747
748 logging.info('Attempting to powerwash the DUT.')
749 self.run('echo "fast safe" > '
750 '/mnt/stateful_partition/factory_install_reset')
751 self.reboot(timeout=self.POWERWASH_BOOT_TIMEOUT, wait=True)
752 if not self.is_up():
Dan Shi9cc48452013-11-12 12:39:26 -0800753 logging.error('Powerwash failed. DUT did not come back after '
Dan Shi2c88eed2013-11-12 10:18:38 -0800754 'reboot.')
755 raise error.AutoservRepairFailure(
756 'DUT failed to boot from powerwash after %d seconds' %
757 self.POWERWASH_BOOT_TIMEOUT)
758
759 logging.info('Powerwash succeeded.')
760 self._install_repair()
761
762
beepsf079cfb2013-09-18 17:49:51 -0700763 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
764 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500765 """
766 Re-install the OS on the DUT by:
767 1) installing a test image on a USB storage device attached to the Servo
768 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800769 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700770 3) installing the image with chromeos-install.
771
Scott Zawalski62bacae2013-03-05 10:40:32 -0500772 @param image_url: If specified use as the url to install on the DUT.
773 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700774 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
775 Factory images need a longer usb_boot_timeout than regular
776 cros images.
777 @param install_timeout: The timeout to use when installing the chromeos
778 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800779
Scott Zawalski62bacae2013-03-05 10:40:32 -0500780 @raises AutoservError if the image fails to boot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800781 """
beepsf079cfb2013-09-18 17:49:51 -0700782
783 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
784 % usb_boot_timeout)
785 logging.info('Downloading image to USB, then booting from it. Usb boot '
786 'timeout = %s', usb_boot_timeout)
787 timer = stats.Timer(usb_boot_timer_key)
788 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700789 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700790 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500791 raise error.AutoservRepairFailure(
792 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700793 usb_boot_timeout)
794 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500795
beepsf079cfb2013-09-18 17:49:51 -0700796 install_timer_key = ('servo_install.install_timeout_%s'
797 % install_timeout)
798 timer = stats.Timer(install_timer_key)
799 timer.start()
800 logging.info('Installing image through chromeos-install.')
801 self.run('chromeos-install --yes', timeout=install_timeout)
802 timer.stop()
803
804 logging.info('Power cycling DUT through servo.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800805 self.servo.power_long_press()
Fang Dengafb88142013-05-30 17:44:31 -0700806 self.servo.switch_usbkey('off')
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700807 # We *must* use power_on() here; on Parrot it's how we get
808 # out of recovery mode.
809 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -0700810
811 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800812 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
813 raise error.AutoservError('DUT failed to reboot installed '
814 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500815 self.BOOT_TIMEOUT)
816
817
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700818 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500819 """Reinstall the DUT utilizing servo and a test image.
820
821 Re-install the OS on the DUT by:
822 1) installing a test image on a USB storage device attached to the Servo
823 board,
824 2) booting that image in recovery mode, and then
825 3) installing the image with chromeos-install.
826
Scott Zawalski62bacae2013-03-05 10:40:32 -0500827 @raises AutoservRepairMethodNA if the device does not have servo
828 support.
829
830 """
831 if not self.servo:
832 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
833 'DUT has no servo support.')
834
835 logging.info('Attempting to recovery servo enabled device with '
836 'servo_repair_reinstall')
837
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700838 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500839 self.servo_install(image_url)
840
841
842 def _servo_repair_power(self):
843 """Attempt to repair DUT using an attached Servo.
844
845 Attempt to power on the DUT via power_long_press.
846
847 @raises AutoservRepairMethodNA if the device does not have servo
848 support.
849 @raises AutoservRepairFailure if the repair fails for any reason.
850 """
851 if not self.servo:
852 raise error.AutoservRepairMethodNA('Repair Power NA: '
853 'DUT has no servo support.')
854
855 logging.info('Attempting to recover servo enabled device by '
856 'powering it off and on.')
857 self.servo.get_power_state_controller().power_off()
858 self.servo.get_power_state_controller().power_on()
859 if self.wait_up(self.BOOT_TIMEOUT):
860 return
861
862 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800863
864
Richard Barnette82c35912012-11-20 10:09:10 -0800865 def _powercycle_to_repair(self):
866 """Utilize the RPM Infrastructure to bring the host back up.
867
868 If the host is not up/repaired after the first powercycle we utilize
869 auto fallback to the last good install by powercycling and rebooting the
870 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500871
872 @raises AutoservRepairMethodNA if the device does not support remote
873 power.
874 @raises AutoservRepairFailure if the repair fails for any reason.
875
Richard Barnette82c35912012-11-20 10:09:10 -0800876 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500877 if not self.has_power():
878 raise error.AutoservRepairMethodNA('Device does not support power.')
879
Richard Barnette82c35912012-11-20 10:09:10 -0800880 logging.info('Attempting repair via RPM powercycle.')
881 failed_cycles = 0
882 self.power_cycle()
883 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
884 failed_cycles += 1
885 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -0500886 raise error.AutoservRepairFailure(
887 'Powercycled host %s %d times; device did not come back'
888 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -0800889 self.power_cycle()
890 if failed_cycles == 0:
891 logging.info('Powercycling was successful first time.')
892 else:
893 logging.info('Powercycling was successful after %d failures.',
894 failed_cycles)
895
896
897 def repair_full(self):
898 """Repair a host for repair level NO_PROTECTION.
899
900 This overrides the base class function for repair; it does
901 not call back to the parent class, but instead offers a
902 simplified implementation based on the capabilities in the
903 Chrome OS test lab.
904
Fang Deng5d518f42013-08-02 14:04:32 -0700905 It first verifies and repairs servo if it is a DUT in CrOS
Fang Deng03590af2013-10-07 17:34:20 -0700906 lab and a servo is attached.
Fang Deng5d518f42013-08-02 14:04:32 -0700907
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700908 If `self.verify()` fails, the following procedures are
909 attempted:
910 1. Try to re-install to a known stable image using
911 auto-update.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500912 2. If there's a servo for the DUT, try to power the DUT off and
913 on.
914 3. If there's a servo for the DUT, try to re-install via
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700915 the servo.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500916 4. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800917 by power-cycling.
918
919 As with the parent method, the last operation performed on
920 the DUT must be to call `self.verify()`; if that call fails,
921 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700922
Scott Zawalski62bacae2013-03-05 10:40:32 -0500923 @raises AutoservRepairTotalFailure if the repair process fails to
924 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -0700925 @raises ServoHostRepairTotalFailure if the repair process fails to
926 fix the servo host if one is attached to the DUT.
927 @raises AutoservSshPermissionDeniedError if it is unable
928 to ssh to the servo host due to permission error.
929
Richard Barnette82c35912012-11-20 10:09:10 -0800930 """
Fang Deng5d518f42013-08-02 14:04:32 -0700931 if self._servo_host:
Fang Deng03590af2013-10-07 17:34:20 -0700932 try:
933 self.servo = self._servo_host.create_healthy_servo_object()
934 except Exception as e:
935 self.servo = None
936 logging.error('Could not create a healthy servo: %s', e)
Fang Deng5d518f42013-08-02 14:04:32 -0700937
Scott Zawalski62bacae2013-03-05 10:40:32 -0500938 # TODO(scottz): This should use something similar to label_decorator,
939 # but needs to be populated in order so DUTs are repaired with the
940 # least amount of effort.
Dan Shi2c88eed2013-11-12 10:18:38 -0800941 repair_funcs = [self._install_repair,
942 self._install_repair_with_powerwash,
943 self._servo_repair_power,
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700944 self._servo_repair_reinstall,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500945 self._powercycle_to_repair]
946 errors = []
Simran Basie6130932013-10-01 14:07:52 -0700947 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500948 for repair_func in repair_funcs:
949 try:
950 repair_func()
951 self.verify()
Simran Basie6130932013-10-01 14:07:52 -0700952 stats.Counter(
953 '%s.SUCCEEDED' % repair_func.__name__).increment()
954 if board:
955 stats.Counter(
956 '%s.SUCCEEDED.%s' % (repair_func.__name__,
957 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500958 return
Simran Basie6130932013-10-01 14:07:52 -0700959 except error.AutoservRepairMethodNA as e:
960 stats.Counter(
961 '%s.RepairNA' % repair_func.__name__).increment()
962 if board:
963 stats.Counter(
964 '%s.RepairNA.%s' % (repair_func.__name__,
965 board)).increment()
966 logging.warn('Repair function NA: %s', e)
967 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -0500968 except Exception as e:
Simran Basie6130932013-10-01 14:07:52 -0700969 stats.Counter(
970 '%s.FAILED' % repair_func.__name__).increment()
971 if board:
972 stats.Counter(
973 '%s.FAILED.%s' % (repair_func.__name__,
974 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500975 logging.warn('Failed to repair device: %s', e)
976 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500977
Simran Basie6130932013-10-01 14:07:52 -0700978 stats.Counter('Full_Repair_Failed').increment()
979 if board:
980 stats.Counter(
981 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500982 raise error.AutoservRepairTotalFailure(
983 'All attempts at repairing the device failed:\n%s' %
984 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -0800985
986
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700987 def close(self):
beeps32a63082013-08-22 14:02:29 -0700988 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -0700989 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700990
991
Simran Basi5e6339a2013-03-21 11:34:32 -0700992 def _cleanup_poweron(self):
993 """Special cleanup method to make sure hosts always get power back."""
994 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
995 hosts = afe.get_hosts(hostname=self.hostname)
996 if not hosts or not (self._RPM_OUTLET_CHANGED in
997 hosts[0].attributes):
998 return
999 logging.debug('This host has recently interacted with the RPM'
1000 ' Infrastructure. Ensuring power is on.')
1001 try:
1002 self.power_on()
1003 except rpm_client.RemotePowerException:
1004 # If cleanup has completed but there was an issue with the RPM
1005 # Infrastructure, log an error message rather than fail cleanup
1006 logging.error('Failed to turn Power On for this host after '
1007 'cleanup through the RPM Infrastructure.')
1008 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1009 hostname=self.hostname)
1010
1011
beepsc87ff602013-07-31 21:53:00 -07001012 def _is_factory_image(self):
1013 """Checks if the image on the DUT is a factory image.
1014
1015 @return: True if the image on the DUT is a factory image.
1016 False otherwise.
1017 """
1018 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1019 return result.exit_status == 0
1020
1021
1022 def _restart_ui(self):
1023 """Restarts ui.
1024
1025 @raises: FactoryImageCheckerException for factory images, since
1026 we cannot attempt to restart ui on them.
1027 error.AutoservRunError for any other type of error that
1028 occurs while restarting ui.
1029 """
1030 if self._is_factory_image():
1031 raise FactoryImageCheckerException('Cannot restart ui on factory '
1032 'images')
1033
Chris Sosaf4d43ff2012-10-30 11:21:05 -07001034 client_at = autotest.Autotest(self)
beepsc87ff602013-07-31 21:53:00 -07001035 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
1036 '_clear_login_prompt_state')
1037 self.run('restart ui')
1038 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
1039 '_wait_for_login_prompt')
1040
1041
1042 def cleanup(self):
Richard Barnette82c35912012-11-20 10:09:10 -08001043 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001044 try:
beepsc87ff602013-07-31 21:53:00 -07001045 self._restart_ui()
1046 except (error.AutotestRunError, error.AutoservRunError,
1047 FactoryImageCheckerException):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001048 logging.warn('Unable to restart ui, rebooting device.')
1049 # Since restarting the UI fails fall back to normal Autotest
1050 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001051 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001052 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001053 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001054 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001055
1056
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001057 def reboot(self, **dargs):
1058 """
1059 This function reboots the site host. The more generic
1060 RemoteHost.reboot() performs sync and sleeps for 5
1061 seconds. This is not necessary for Chrome OS devices as the
1062 sync should be finished in a short time during the reboot
1063 command.
1064 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001065 if 'reboot_cmd' not in dargs:
1066 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
1067 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001068 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001069 if 'fastsync' not in dargs:
1070 dargs['fastsync'] = True
Fang Deng0ca40e22013-08-27 17:47:44 -07001071 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001072
1073
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001074 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001075 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001076
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001077 Tests for the following conditions:
1078 1. All conditions tested by the parent version of this
1079 function.
1080 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001081 3. Sufficient space in /mnt/stateful_partition/encrypted.
1082 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001083
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001084 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001085 super(CrosHost, self).verify_software()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001086 self.check_diskspace(
1087 '/mnt/stateful_partition',
1088 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001089 'SERVER', 'gb_diskspace_required', type=float,
1090 default=20.0))
1091 self.check_diskspace(
1092 '/mnt/stateful_partition/encrypted',
1093 global_config.global_config.get_config_value(
1094 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1095 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001096
1097 # Factory images don't run update engine,
1098 # goofy controls dbus on these DUTs.
1099 if not self._is_factory_image():
1100 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001101 # Makes sure python is present, loads and can use built in functions.
1102 # We have seen cases where importing cPickle fails with undefined
1103 # symbols in cPickle.so.
1104 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001105
1106
Fang Deng96667ca2013-08-01 17:46:18 -07001107 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1108 connect_timeout=None, alive_interval=None):
1109 """Override default make_ssh_command to use options tuned for Chrome OS.
1110
1111 Tuning changes:
1112 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1113 connection failure. Consistency with remote_access.sh.
1114
1115 - ServerAliveInterval=180; which causes SSH to ping connection every
1116 180 seconds. In conjunction with ServerAliveCountMax ensures
1117 that if the connection dies, Autotest will bail out quickly.
1118 Originally tried 60 secs, but saw frequent job ABORTS where
1119 the test completed successfully.
1120
1121 - ServerAliveCountMax=3; consistency with remote_access.sh.
1122
1123 - ConnectAttempts=4; reduce flakiness in connection errors;
1124 consistency with remote_access.sh.
1125
1126 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1127 Host keys change with every new installation, don't waste
1128 memory/space saving them.
1129
1130 - SSH protocol forced to 2; needed for ServerAliveInterval.
1131
1132 @param user User name to use for the ssh connection.
1133 @param port Port on the target host to use for ssh connection.
1134 @param opts Additional options to the ssh command.
1135 @param hosts_file Ignored.
1136 @param connect_timeout Ignored.
1137 @param alive_interval Ignored.
1138 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001139 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1140 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001141 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1142 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1143 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1144 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001145 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1146 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001147
1148
beeps32a63082013-08-22 14:02:29 -07001149 def _create_ssh_tunnel(self, port, local_port):
1150 """Create an ssh tunnel from local_port to port.
1151
1152 @param port: remote port on the host.
1153 @param local_port: local forwarding port.
1154
1155 @return: the tunnel process.
1156 """
1157 # Chrome OS on the target closes down most external ports
1158 # for security. We could open the port, but doing that
1159 # would conflict with security tests that check that only
1160 # expected ports are open. So, to get to the port on the
1161 # target we use an ssh tunnel.
1162 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1163 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1164 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1165 logging.debug('Full tunnel command: %s', tunnel_cmd)
1166 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1167 logging.debug('Started ssh tunnel, local = %d'
1168 ' remote = %d, pid = %d',
1169 local_port, port, tunnel_proc.pid)
1170 return tunnel_proc
1171
1172
Christopher Wileydd181852013-10-10 19:56:58 -07001173 def _setup_rpc(self, port, command_name, remote_pid=None):
beeps32a63082013-08-22 14:02:29 -07001174 """Sets up a tunnel process and performs rpc connection book keeping.
1175
1176 This method assumes that xmlrpc and jsonrpc never conflict, since
1177 we can only either have an xmlrpc or a jsonrpc server listening on
1178 a remote port. As such, it enforces a single proxy->remote port
1179 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1180 and then tries to start an xmlrpc proxy forwarded to the same port,
1181 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1182
1183 1. None of the methods on the xmlrpc proxy will work because
1184 the server listening on B is jsonrpc.
1185
1186 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1187 server, as the only use case currently is goofy, which is tied to
1188 the factory image. It is much easier to handle a failed xmlrpc
1189 call on the client than it is to terminate goofy in this scenario,
1190 as doing the latter might leave the DUT in a hard to recover state.
1191
1192 With the current implementation newer rpc proxy connections will
1193 terminate the tunnel processes of older rpc connections tunneling
1194 to the same remote port. If methods are invoked on the client
1195 after this has happened they will fail with connection closed errors.
1196
1197 @param port: The remote forwarding port.
1198 @param command_name: The name of the remote process, to terminate
1199 using pkill.
1200
1201 @return A url that we can use to initiate the rpc connection.
1202 """
1203 self.rpc_disconnect(port)
1204 local_port = utils.get_unused_port()
1205 tunnel_proc = self._create_ssh_tunnel(port, local_port)
Christopher Wileydd181852013-10-10 19:56:58 -07001206 self._rpc_proxy_map[port] = (command_name, tunnel_proc, remote_pid)
beeps32a63082013-08-22 14:02:29 -07001207 return self._RPC_PROXY_URL % local_port
1208
1209
Christopher Wileyd78249a2013-03-01 13:05:31 -08001210 def xmlrpc_connect(self, command, port, command_name=None,
1211 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001212 """Connect to an XMLRPC server on the host.
1213
1214 The `command` argument should be a simple shell command that
1215 starts an XMLRPC server on the given `port`. The command
1216 must not daemonize, and must terminate cleanly on SIGTERM.
1217 The command is started in the background on the host, and a
1218 local XMLRPC client for the server is created and returned
1219 to the caller.
1220
1221 Note that the process of creating an XMLRPC client makes no
1222 attempt to connect to the remote server; the caller is
1223 responsible for determining whether the server is running
1224 correctly, and is ready to serve requests.
1225
Christopher Wileyd78249a2013-03-01 13:05:31 -08001226 Optionally, the caller can pass ready_test_name, a string
1227 containing the name of a method to call on the proxy. This
1228 method should take no parameters and return successfully only
1229 when the server is ready to process client requests. When
1230 ready_test_name is set, xmlrpc_connect will block until the
1231 proxy is ready, and throw a TestError if the server isn't
1232 ready by timeout_seconds.
1233
beeps32a63082013-08-22 14:02:29 -07001234 If a server is already running on the remote port, this
1235 method will kill it and disconnect the tunnel process
1236 associated with the connection before establishing a new one,
1237 by consulting the rpc_proxy_map in rpc_disconnect.
1238
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001239 @param command Shell command to start the server.
1240 @param port Port number on which the server is expected to
1241 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001242 @param command_name String to use as input to `pkill` to
1243 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001244 @param ready_test_name String containing the name of a
1245 method defined on the XMLRPC server.
1246 @param timeout_seconds Number of seconds to wait
1247 for the server to become 'ready.' Will throw a
1248 TestFail error if server is not ready in time.
1249
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001250 """
Christopher Wileyc14f06a2013-10-16 13:55:39 -07001251 # Clean up any existing state. If the caller is willing
1252 # to believe their server is down, we ought to clean up
1253 # any tunnels we might have sitting around.
1254 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001255 # Start the server on the host. Redirection in the command
1256 # below is necessary, because 'ssh' won't terminate until
1257 # background child processes close stdin, stdout, and
1258 # stderr.
Christopher Wileydd181852013-10-10 19:56:58 -07001259 remote_cmd = '%s </dev/null >/dev/null 2>&1 & echo $!' % command
1260 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001261 logging.debug('Started XMLRPC server on host %s, pid = %s',
1262 self.hostname, remote_pid)
1263
Christopher Wileydd181852013-10-10 19:56:58 -07001264 # Tunnel through SSH to be able to reach that remote port.
1265 rpc_url = self._setup_rpc(port, command_name, remote_pid=remote_pid)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001266 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001267
Christopher Wileyd78249a2013-03-01 13:05:31 -08001268 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001269 # retry.retry logs each attempt; calculate delay_sec to
1270 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001271 @retry.retry((socket.error,
1272 xmlrpclib.ProtocolError,
1273 httplib.BadStatusLine),
Chris Sosa65425082013-10-16 13:26:22 -07001274 timeout_min=timeout_seconds / 60.0,
1275 delay_sec=min(max(timeout_seconds / 20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001276 def ready_test():
1277 """ Call proxy.ready_test_name(). """
1278 getattr(proxy, ready_test_name)()
1279 successful = False
1280 try:
1281 logging.info('Waiting %d seconds for XMLRPC server '
1282 'to start.', timeout_seconds)
1283 ready_test()
1284 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001285 finally:
1286 if not successful:
1287 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001288 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001289 logging.info('XMLRPC server started successfully.')
1290 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001291
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001292
Jason Abeleb6f924f2013-11-13 16:01:54 -08001293 def syslog(self, message, tag='autotest'):
1294 """Logs a message to syslog on host.
1295
1296 @param message String message to log into syslog
1297 @param tag String tag prefix for syslog
1298
1299 """
1300 self.run('logger -t "%s" "%s"' % (tag, message))
1301
1302
beeps32a63082013-08-22 14:02:29 -07001303 def jsonrpc_connect(self, port):
1304 """Creates a jsonrpc proxy connection through an ssh tunnel.
1305
1306 This method exists to facilitate communication with goofy (which is
1307 the default system manager on all factory images) and as such, leaves
1308 most of the rpc server sanity checking to the caller. Unlike
1309 xmlrpc_connect, this method does not facilitate the creation of a remote
1310 jsonrpc server, as the only clients of this code are factory tests,
1311 for which the goofy system manager is built in to the image and starts
1312 when the target boots.
1313
1314 One can theoretically create multiple jsonrpc proxies all forwarded
1315 to the same remote port, provided the remote port has an rpc server
1316 listening. However, in doing so we stand the risk of leaking an
1317 existing tunnel process, so we always disconnect any older tunnels
1318 we might have through rpc_disconnect.
1319
1320 @param port: port on the remote host that is serving this proxy.
1321
1322 @return: The client proxy.
1323 """
1324 if not jsonrpclib:
1325 logging.warning('Jsonrpclib could not be imported. Check that '
1326 'site-packages contains jsonrpclib.')
1327 return None
1328
1329 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1330
1331 logging.info('Established a jsonrpc connection through port %s.', port)
1332 return proxy
1333
1334
1335 def rpc_disconnect(self, port):
1336 """Disconnect from an RPC server on the host.
1337
1338 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001339 the given `port`. Also closes the local ssh tunnel created
1340 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001341 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001342 client object; however disconnection will cause all
1343 subsequent calls to methods on the object to fail.
1344
1345 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001346 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001347
1348 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001349 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001350 """
beeps32a63082013-08-22 14:02:29 -07001351 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001352 return
Christopher Wileydd181852013-10-10 19:56:58 -07001353 remote_name, tunnel_proc, remote_pid = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001354 if remote_name:
1355 # We use 'pkill' to find our target process rather than
1356 # a PID, because the host may have rebooted since
1357 # connecting, and we don't want to kill an innocent
1358 # process with the same PID.
1359 #
1360 # 'pkill' helpfully exits with status 1 if no target
1361 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001362 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001363 # status.
1364 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001365 if remote_pid:
1366 logging.info('Waiting for RPC server "%s" shutdown',
1367 remote_name)
1368 start_time = time.time()
1369 while (time.time() - start_time <
1370 self._RPC_SHUTDOWN_TIMEOUT_SECONDS):
1371 running_processes = self.run(
1372 "pgrep -f '%s'" % remote_name,
1373 ignore_status=True).stdout.split()
1374 if not remote_pid in running_processes:
1375 logging.info('Shut down RPC server.')
1376 break
1377 time.sleep(self._RPC_SHUTDOWN_POLLING_PERIOD_SECONDS)
1378 else:
1379 raise error.TestError('Failed to shutdown RPC server %s' %
1380 remote_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001381
1382 if tunnel_proc.poll() is None:
1383 tunnel_proc.terminate()
1384 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1385 else:
1386 logging.debug('Tunnel pid %d terminated early, status %d',
1387 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001388 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001389
1390
beeps32a63082013-08-22 14:02:29 -07001391 def rpc_disconnect_all(self):
1392 """Disconnect all known RPC proxy ports."""
1393 for port in self._rpc_proxy_map.keys():
1394 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001395
1396
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001397 def _ping_check_status(self, status):
1398 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001399
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001400 @param status Check the ping status against this value.
1401 @return True iff `status` and the result of ping are the same
1402 (i.e. both True or both False).
1403
1404 """
1405 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1406 return not (status ^ (ping_val == 0))
1407
1408 def _ping_wait_for_status(self, status, timeout):
1409 """Wait for the host to have a given status (UP or DOWN).
1410
1411 Status is checked by polling. Polling will not last longer
1412 than the number of seconds in `timeout`. The polling
1413 interval will be long enough that only approximately
1414 _PING_WAIT_COUNT polling cycles will be executed, subject
1415 to a maximum interval of about one minute.
1416
1417 @param status Waiting will stop immediately if `ping` of the
1418 host returns this status.
1419 @param timeout Poll for at most this many seconds.
1420 @return True iff the host status from `ping` matched the
1421 requested status at the time of return.
1422
1423 """
1424 # _ping_check_status() takes about 1 second, hence the
1425 # "- 1" in the formula below.
1426 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1427 end_time = time.time() + timeout
1428 while time.time() <= end_time:
1429 if self._ping_check_status(status):
1430 return True
1431 if poll_interval > 0:
1432 time.sleep(poll_interval)
1433
1434 # The last thing we did was sleep(poll_interval), so it may
1435 # have been too long since the last `ping`. Check one more
1436 # time, just to be sure.
1437 return self._ping_check_status(status)
1438
1439 def ping_wait_up(self, timeout):
1440 """Wait for the host to respond to `ping`.
1441
1442 N.B. This method is not a reliable substitute for
1443 `wait_up()`, because a host that responds to ping will not
1444 necessarily respond to ssh. This method should only be used
1445 if the target DUT can be considered functional even if it
1446 can't be reached via ssh.
1447
1448 @param timeout Minimum time to allow before declaring the
1449 host to be non-responsive.
1450 @return True iff the host answered to ping before the timeout.
1451
1452 """
1453 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001454
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001455 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001456 """Wait until the host no longer responds to `ping`.
1457
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001458 This function can be used as a slightly faster version of
1459 `wait_down()`, by avoiding potentially long ssh timeouts.
1460
1461 @param timeout Minimum time to allow for the host to become
1462 non-responsive.
1463 @return True iff the host quit answering ping before the
1464 timeout.
1465
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001466 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001467 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001468
1469 def test_wait_for_sleep(self):
1470 """Wait for the client to enter low-power sleep mode.
1471
1472 The test for "is asleep" can't distinguish a system that is
1473 powered off; to confirm that the unit was asleep, it is
1474 necessary to force resume, and then call
1475 `test_wait_for_resume()`.
1476
1477 This function is expected to be called from a test as part
1478 of a sequence like the following:
1479
1480 ~~~~~~~~
1481 boot_id = host.get_boot_id()
1482 # trigger sleep on the host
1483 host.test_wait_for_sleep()
1484 # trigger resume on the host
1485 host.test_wait_for_resume(boot_id)
1486 ~~~~~~~~
1487
1488 @exception TestFail The host did not go to sleep within
1489 the allowed time.
1490 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001491 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001492 raise error.TestFail(
1493 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001494 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001495
1496
1497 def test_wait_for_resume(self, old_boot_id):
1498 """Wait for the client to resume from low-power sleep mode.
1499
1500 The `old_boot_id` parameter should be the value from
1501 `get_boot_id()` obtained prior to entering sleep mode. A
1502 `TestFail` exception is raised if the boot id changes.
1503
1504 See @ref test_wait_for_sleep for more on this function's
1505 usage.
1506
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001507 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001508 target host went to sleep.
1509
1510 @exception TestFail The host did not respond within the
1511 allowed time.
1512 @exception TestFail The host responded, but the boot id test
1513 indicated a reboot rather than a sleep
1514 cycle.
1515 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001516 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001517 raise error.TestFail(
1518 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001519 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001520 else:
1521 new_boot_id = self.get_boot_id()
1522 if new_boot_id != old_boot_id:
1523 raise error.TestFail(
1524 'client rebooted, but sleep was expected'
1525 ' (old boot %s, new boot %s)'
1526 % (old_boot_id, new_boot_id))
1527
1528
1529 def test_wait_for_shutdown(self):
1530 """Wait for the client to shut down.
1531
1532 The test for "has shut down" can't distinguish a system that
1533 is merely asleep; to confirm that the unit was down, it is
1534 necessary to force boot, and then call test_wait_for_boot().
1535
1536 This function is expected to be called from a test as part
1537 of a sequence like the following:
1538
1539 ~~~~~~~~
1540 boot_id = host.get_boot_id()
1541 # trigger shutdown on the host
1542 host.test_wait_for_shutdown()
1543 # trigger boot on the host
1544 host.test_wait_for_boot(boot_id)
1545 ~~~~~~~~
1546
1547 @exception TestFail The host did not shut down within the
1548 allowed time.
1549 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001550 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001551 raise error.TestFail(
1552 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001553 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001554
1555
1556 def test_wait_for_boot(self, old_boot_id=None):
1557 """Wait for the client to boot from cold power.
1558
1559 The `old_boot_id` parameter should be the value from
1560 `get_boot_id()` obtained prior to shutting down. A
1561 `TestFail` exception is raised if the boot id does not
1562 change. The boot id test is omitted if `old_boot_id` is not
1563 specified.
1564
1565 See @ref test_wait_for_shutdown for more on this function's
1566 usage.
1567
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001568 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001569 shut down.
1570
1571 @exception TestFail The host did not respond within the
1572 allowed time.
1573 @exception TestFail The host responded, but the boot id test
1574 indicated that there was no reboot.
1575 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001576 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001577 raise error.TestFail(
1578 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001579 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001580 elif old_boot_id:
1581 if self.get_boot_id() == old_boot_id:
1582 raise error.TestFail(
1583 'client is back up, but did not reboot'
1584 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001585
1586
1587 @staticmethod
1588 def check_for_rpm_support(hostname):
1589 """For a given hostname, return whether or not it is powered by an RPM.
1590
Simran Basi1df55112013-09-06 11:25:09 -07001591 @param hostname: hostname to check for rpm support.
1592
Simran Basid5e5e272012-09-24 15:23:59 -07001593 @return None if this host does not follows the defined naming format
1594 for RPM powered DUT's in the lab. If it does follow the format,
1595 it returns a regular expression MatchObject instead.
1596 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001597 return re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001598
1599
1600 def has_power(self):
1601 """For this host, return whether or not it is powered by an RPM.
1602
1603 @return True if this host is in the CROS lab and follows the defined
1604 naming format.
1605 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001606 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001607
1608
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001609 def _set_power(self, state, power_method):
1610 """Sets the power to the host via RPM, Servo or manual.
1611
1612 @param state Specifies which power state to set to DUT
1613 @param power_method Specifies which method of power control to
1614 use. By default "RPM" will be used. Valid values
1615 are the strings "RPM", "manual", "servoj10".
1616
1617 """
1618 ACCEPTABLE_STATES = ['ON', 'OFF']
1619
1620 if state.upper() not in ACCEPTABLE_STATES:
1621 raise error.TestError('State must be one of: %s.'
1622 % (ACCEPTABLE_STATES,))
1623
1624 if power_method == self.POWER_CONTROL_SERVO:
1625 logging.info('Setting servo port J10 to %s', state)
1626 self.servo.set('prtctl3_pwren', state.lower())
1627 time.sleep(self._USB_POWER_TIMEOUT)
1628 elif power_method == self.POWER_CONTROL_MANUAL:
1629 logging.info('You have %d seconds to set the AC power to %s.',
1630 self._POWER_CYCLE_TIMEOUT, state)
1631 time.sleep(self._POWER_CYCLE_TIMEOUT)
1632 else:
1633 if not self.has_power():
1634 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001635 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1636 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1637 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07001638 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07001639
1640
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001641 def power_off(self, power_method=POWER_CONTROL_RPM):
1642 """Turn off power to this host via RPM, Servo or manual.
1643
1644 @param power_method Specifies which method of power control to
1645 use. By default "RPM" will be used. Valid values
1646 are the strings "RPM", "manual", "servoj10".
1647
1648 """
1649 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001650
1651
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001652 def power_on(self, power_method=POWER_CONTROL_RPM):
1653 """Turn on power to this host via RPM, Servo or manual.
1654
1655 @param power_method Specifies which method of power control to
1656 use. By default "RPM" will be used. Valid values
1657 are the strings "RPM", "manual", "servoj10".
1658
1659 """
1660 self._set_power('ON', power_method)
1661
1662
1663 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1664 """Cycle power to this host by turning it OFF, then ON.
1665
1666 @param power_method Specifies which method of power control to
1667 use. By default "RPM" will be used. Valid values
1668 are the strings "RPM", "manual", "servoj10".
1669
1670 """
1671 if power_method in (self.POWER_CONTROL_SERVO,
1672 self.POWER_CONTROL_MANUAL):
1673 self.power_off(power_method=power_method)
1674 time.sleep(self._POWER_CYCLE_TIMEOUT)
1675 self.power_on(power_method=power_method)
1676 else:
1677 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001678
1679
1680 def get_platform(self):
1681 """Determine the correct platform label for this host.
1682
1683 @returns a string representing this host's platform.
1684 """
1685 crossystem = utils.Crossystem(self)
1686 crossystem.init()
1687 # Extract fwid value and use the leading part as the platform id.
1688 # fwid generally follow the format of {platform}.{firmware version}
1689 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1690 platform = crossystem.fwid().split('.')[0].lower()
1691 # Newer platforms start with 'Google_' while the older ones do not.
1692 return platform.replace('google_', '')
1693
1694
Aviv Keshet74c89a92013-02-04 15:18:30 -08001695 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001696 def get_board(self):
1697 """Determine the correct board label for this host.
1698
1699 @returns a string representing this host's board.
1700 """
1701 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1702 run_method=self.run)
1703 board = release_info['CHROMEOS_RELEASE_BOARD']
1704 # Devices in the lab generally have the correct board name but our own
1705 # development devices have {board_name}-signed-{key_type}. The board
1706 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001707 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001708 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001709 return board_format_string % board.split('-')[0]
1710 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001711
1712
Aviv Keshet74c89a92013-02-04 15:18:30 -08001713 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001714 def has_lightsensor(self):
1715 """Determine the correct board label for this host.
1716
1717 @returns the string 'lightsensor' if this host has a lightsensor or
1718 None if it does not.
1719 """
1720 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001721 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001722 try:
1723 # Run the search cmd following the symlinks. Stderr_tee is set to
1724 # None as there can be a symlink loop, but the command will still
1725 # execute correctly with a few messages printed to stderr.
1726 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1727 return 'lightsensor'
1728 except error.AutoservRunError:
1729 # egrep exited with a return code of 1 meaning none of the possible
1730 # lightsensor files existed.
1731 return None
1732
1733
Aviv Keshet74c89a92013-02-04 15:18:30 -08001734 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001735 def has_bluetooth(self):
1736 """Determine the correct board label for this host.
1737
1738 @returns the string 'bluetooth' if this host has bluetooth or
1739 None if it does not.
1740 """
1741 try:
1742 self.run('test -d /sys/class/bluetooth/hci0')
1743 # test exited with a return code of 0.
1744 return 'bluetooth'
1745 except error.AutoservRunError:
1746 # test exited with a return code 1 meaning the directory did not
1747 # exist.
1748 return None
1749
1750
Ilja Friedel0ce0b602013-08-15 18:45:27 -07001751 @label_decorator('graphics')
1752 def get_graphics(self):
1753 """
1754 Determine the correct board label for this host.
1755
1756 @returns a string representing this host's graphics. For now ARM boards
1757 return graphics:gles while all other boards return graphics:gl. This
1758 may change over time, but for robustness reasons this should avoid
1759 executing code in actual graphics libraries (which may not be ready and
1760 is tested by graphics_GLAPICheck).
1761 """
1762 uname = self.run('uname -a').stdout.lower()
1763 if 'arm' in uname:
1764 return 'graphics:gles'
1765 return 'graphics:gl'
1766
1767
Simran Basic6f1f7a2012-10-16 10:47:46 -07001768 def get_labels(self):
1769 """Return a list of labels for this given host.
1770
1771 This is the main way to retrieve all the automatic labels for a host
1772 as it will run through all the currently implemented label functions.
1773 """
1774 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001775 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001776 label = label_function(self)
1777 if label:
1778 labels.append(label)
1779 return labels