blob: 86c4fee147ebcb7b4d65b571116b6c938ad3b5ed [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Aviv Keshet74c89a92013-02-04 15:18:30 -08005import functools
Christopher Wiley0ed712b2013-04-09 15:25:12 -07006import httplib
J. Richard Barnette1d78b012012-05-15 13:56:30 -07007import logging
Dan Shi0f466e82013-02-22 15:44:58 -08008import os
Simran Basid5e5e272012-09-24 15:23:59 -07009import re
Christopher Wileyd78249a2013-03-01 13:05:31 -080010import socket
J. Richard Barnette1d78b012012-05-15 13:56:30 -070011import subprocess
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070012import time
J. Richard Barnette1d78b012012-05-15 13:56:30 -070013import xmlrpclib
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070014
J. Richard Barnette45e93de2012-04-11 17:24:15 -070015from autotest_lib.client.bin import utils
Richard Barnette0c73ffc2012-11-19 15:21:18 -080016from autotest_lib.client.common_lib import error
17from autotest_lib.client.common_lib import global_config
beeps687243d2013-07-18 15:29:27 -070018from autotest_lib.client.common_lib import site_utils
J. Richard Barnette45e93de2012-04-11 17:24:15 -070019from autotest_lib.client.common_lib.cros import autoupdater
Richard Barnette03a0c132012-11-05 12:40:35 -080020from autotest_lib.client.common_lib.cros import dev_server
Christopher Wileyd78249a2013-03-01 13:05:31 -080021from autotest_lib.client.common_lib.cros import retry
Richard Barnette82c35912012-11-20 10:09:10 -080022from autotest_lib.client.cros import constants
J. Richard Barnette45e93de2012-04-11 17:24:15 -070023from autotest_lib.server import autoserv_parser
Chris Sosaf4d43ff2012-10-30 11:21:05 -070024from autotest_lib.server import autotest
Dan Shia1ecd5c2013-06-06 11:21:31 -070025from autotest_lib.server import utils as server_utils
Scott Zawalski89c44dd2013-02-26 09:28:02 -050026from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
Simran Basi5e6339a2013-03-21 11:34:32 -070027from autotest_lib.server.cros.dynamic_suite import tools, frontend_wrappers
Fang Deng96667ca2013-08-01 17:46:18 -070028from autotest_lib.server.hosts import abstract_ssh
Fang Deng5d518f42013-08-02 14:04:32 -070029from autotest_lib.server.hosts import servo_host
beeps687243d2013-07-18 15:29:27 -070030from autotest_lib.site_utils.graphite import stats
Simran Basidcff4252012-11-20 16:13:20 -080031from autotest_lib.site_utils.rpm_control_system import rpm_client
Simran Basid5e5e272012-09-24 15:23:59 -070032
33
beeps32a63082013-08-22 14:02:29 -070034try:
35 import jsonrpclib
36except ImportError:
37 jsonrpclib = None
Fang Deng96667ca2013-08-01 17:46:18 -070038
Fang Dengd1c2b732013-08-20 12:59:46 -070039
beepsc87ff602013-07-31 21:53:00 -070040class FactoryImageCheckerException(error.AutoservError):
41 """Exception raised when an image is a factory image."""
42 pass
43
44
Aviv Keshet74c89a92013-02-04 15:18:30 -080045def add_label_detector(label_function_list, label_list=None, label=None):
46 """Decorator used to group functions together into the provided list.
47 @param label_function_list: List of label detecting functions to add
48 decorated function to.
49 @param label_list: List of detectable labels to add detectable labels to.
50 (Default: None)
51 @param label: Label string that is detectable by this detection function
52 (Default: None)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -080053 """
Simran Basic6f1f7a2012-10-16 10:47:46 -070054 def add_func(func):
Aviv Keshet74c89a92013-02-04 15:18:30 -080055 """
56 @param func: The function to be added as a detector.
57 """
58 label_function_list.append(func)
59 if label and label_list is not None:
60 label_list.append(label)
Simran Basic6f1f7a2012-10-16 10:47:46 -070061 return func
62 return add_func
63
64
Fang Deng0ca40e22013-08-27 17:47:44 -070065class CrosHost(abstract_ssh.AbstractSSHHost):
J. Richard Barnette45e93de2012-04-11 17:24:15 -070066 """Chromium OS specific subclass of Host."""
67
68 _parser = autoserv_parser.autoserv_parser
Scott Zawalski62bacae2013-03-05 10:40:32 -050069 _AFE = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070070
Richard Barnette0c73ffc2012-11-19 15:21:18 -080071 # Time to wait for new kernel to be marked successful after
72 # auto update.
Chris Masone163cead2012-05-16 11:49:48 -070073 _KERNEL_UPDATE_TIMEOUT = 120
J. Richard Barnette45e93de2012-04-11 17:24:15 -070074
Richard Barnette03a0c132012-11-05 12:40:35 -080075 # Timeout values (in seconds) associated with various Chrome OS
76 # state changes.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -070077 #
Richard Barnette0c73ffc2012-11-19 15:21:18 -080078 # In general, a good rule of thumb is that the timeout can be up
79 # to twice the typical measured value on the slowest platform.
80 # The times here have not necessarily been empirically tested to
81 # meet this criterion.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070082 #
83 # SLEEP_TIMEOUT: Time to allow for suspend to memory.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080084 # RESUME_TIMEOUT: Time to allow for resume after suspend, plus
85 # time to restart the netwowrk.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070086 # BOOT_TIMEOUT: Time to allow for boot from power off. Among
Richard Barnette0c73ffc2012-11-19 15:21:18 -080087 # other things, this must account for the 30 second dev-mode
J. Richard Barnetted4649c62013-03-06 17:42:27 -080088 # screen delay and time to start the network.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070089 # USB_BOOT_TIMEOUT: Time to allow for boot from a USB device,
Richard Barnette0c73ffc2012-11-19 15:21:18 -080090 # including the 30 second dev-mode delay and time to start the
J. Richard Barnetted4649c62013-03-06 17:42:27 -080091 # network.
Richard Barnette0c73ffc2012-11-19 15:21:18 -080092 # SHUTDOWN_TIMEOUT: Time to allow for shut down.
Chris Sosab76e0ee2013-05-22 16:55:41 -070093 # REBOOT_TIMEOUT: How long to wait for a reboot.
beepsf079cfb2013-09-18 17:49:51 -070094 # INSTALL_TIMEOUT: Time to allow for chromeos-install.
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070095
96 SLEEP_TIMEOUT = 2
J. Richard Barnetted4649c62013-03-06 17:42:27 -080097 RESUME_TIMEOUT = 10
J. Richard Barnettefbcc7122013-07-24 18:24:59 -070098 BOOT_TIMEOUT = 60
J. Richard Barnetteeb69d722012-06-18 17:29:44 -070099 USB_BOOT_TIMEOUT = 150
Chris Sosab76e0ee2013-05-22 16:55:41 -0700100
101 # We have a long timeout to ensure we don't flakily fail due to other
102 # issues. Shorter timeouts are vetted in platform_RebootAfterUpdate.
Simran Basi1160e2c2013-10-04 16:00:24 -0700103 # TODO(sbasi - crbug.com/276094) Restore to 5 mins once the 'host did not
104 # return from reboot' bug is solved.
105 REBOOT_TIMEOUT = 480
Chris Sosab76e0ee2013-05-22 16:55:41 -0700106
beepsf079cfb2013-09-18 17:49:51 -0700107 INSTALL_TIMEOUT = 240
Richard Barnette03a0c132012-11-05 12:40:35 -0800108
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800109 # _USB_POWER_TIMEOUT: Time to allow for USB to power toggle ON and OFF.
110 # _POWER_CYCLE_TIMEOUT: Time to allow for manual power cycle.
111 _USB_POWER_TIMEOUT = 5
112 _POWER_CYCLE_TIMEOUT = 10
113
beeps32a63082013-08-22 14:02:29 -0700114 _RPC_PROXY_URL = 'http://localhost:%d'
Christopher Wileydd181852013-10-10 19:56:58 -0700115 _RPC_SHUTDOWN_POLLING_PERIOD_SECONDS = 2
116 _RPC_SHUTDOWN_TIMEOUT_SECONDS = 20
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800117
Richard Barnette82c35912012-11-20 10:09:10 -0800118 _RPM_RECOVERY_BOARDS = global_config.global_config.get_config_value('CROS',
119 'rpm_recovery_boards', type=str).split(',')
120
121 _MAX_POWER_CYCLE_ATTEMPTS = 6
122 _LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
123 _RPM_HOSTNAME_REGEX = ('chromeos[0-9]+(-row[0-9]+)?-rack[0-9]+[a-z]*-'
124 'host[0-9]+')
125 _LIGHTSENSOR_FILES = ['in_illuminance0_input',
126 'in_illuminance0_raw',
127 'illuminance0_input']
128 _LIGHTSENSOR_SEARCH_DIR = '/sys/bus/iio/devices'
129 _LABEL_FUNCTIONS = []
Aviv Keshet74c89a92013-02-04 15:18:30 -0800130 _DETECTABLE_LABELS = []
131 label_decorator = functools.partial(add_label_detector, _LABEL_FUNCTIONS,
132 _DETECTABLE_LABELS)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700133
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -0800134 # Constants used in ping_wait_up() and ping_wait_down().
135 #
136 # _PING_WAIT_COUNT is the approximate number of polling
137 # cycles to use when waiting for a host state change.
138 #
139 # _PING_STATUS_DOWN and _PING_STATUS_UP are names used
140 # for arguments to the internal _ping_wait_for_status()
141 # method.
142 _PING_WAIT_COUNT = 40
143 _PING_STATUS_DOWN = False
144 _PING_STATUS_UP = True
145
Ismail Noorbasha07fdb612013-02-14 14:13:31 -0800146 # Allowed values for the power_method argument.
147
148 # POWER_CONTROL_RPM: Passed as default arg for power_off/on/cycle() methods.
149 # POWER_CONTROL_SERVO: Used in set_power() and power_cycle() methods.
150 # POWER_CONTROL_MANUAL: Used in set_power() and power_cycle() methods.
151 POWER_CONTROL_RPM = 'RPM'
152 POWER_CONTROL_SERVO = 'servoj10'
153 POWER_CONTROL_MANUAL = 'manual'
154
155 POWER_CONTROL_VALID_ARGS = (POWER_CONTROL_RPM,
156 POWER_CONTROL_SERVO,
157 POWER_CONTROL_MANUAL)
Richard Barnette0c73ffc2012-11-19 15:21:18 -0800158
Simran Basi5e6339a2013-03-21 11:34:32 -0700159 _RPM_OUTLET_CHANGED = 'outlet_changed'
160
beeps687243d2013-07-18 15:29:27 -0700161
J. Richard Barnette964fba02012-10-24 17:34:29 -0700162 @staticmethod
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800163 def get_servo_arguments(args_dict):
164 """Extract servo options from `args_dict` and return the result.
165
166 Take the provided dictionary of argument options and return
167 a subset that represent standard arguments needed to
168 construct a servo object for a host. The intent is to
169 provide standard argument processing from run_remote_tests
170 for tests that require a servo to operate.
171
172 Recommended usage:
173 ~~~~~~~~
174 args_dict = utils.args_to_dict(args)
Fang Deng0ca40e22013-08-27 17:47:44 -0700175 servo_args = hosts.CrosHost.get_servo_arguments(args_dict)
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800176 host = hosts.create_host(machine, servo_args=servo_args)
177 ~~~~~~~~
178
179 @param args_dict Dictionary from which to extract the servo
180 arguments.
181 """
J. Richard Barnette964fba02012-10-24 17:34:29 -0700182 servo_args = {}
183 for arg in ('servo_host', 'servo_port'):
J. Richard Barnette7214e0b2013-02-06 15:20:49 -0800184 if arg in args_dict:
185 servo_args[arg] = args_dict[arg]
J. Richard Barnette964fba02012-10-24 17:34:29 -0700186 return servo_args
J. Richard Barnette134ec2c2012-04-25 12:59:37 -0700187
J. Richard Barnette964fba02012-10-24 17:34:29 -0700188
Fang Dengd1c2b732013-08-20 12:59:46 -0700189 def _initialize(self, hostname, servo_args=None, ssh_verbosity_flag='',
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700190 ssh_options='',
Fang Dengd1c2b732013-08-20 12:59:46 -0700191 *args, **dargs):
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700192 """Initialize superclasses, and |self.servo|.
193
Fang Deng5d518f42013-08-02 14:04:32 -0700194 This method checks whether a servo is required by checking whether
195 servo_args is None. This method will only attempt to create a servo
196 object when servo is required by the test.
197
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700198 For creating the host servo object, there are three
199 possibilities: First, if the host is a lab system known to
200 have a servo board, we connect to that servo unconditionally.
201 Second, if we're called from a control file that requires
J. Richard Barnette55fb8062012-05-23 10:29:31 -0700202 servo features for testing, it will pass settings for
203 `servo_host`, `servo_port`, or both. If neither of these
204 cases apply, `self.servo` will be `None`.
J. Richard Barnette67ccb872012-04-19 16:34:56 -0700205
206 """
Fang Deng0ca40e22013-08-27 17:47:44 -0700207 super(CrosHost, self)._initialize(hostname=hostname,
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700208 *args, **dargs)
J. Richard Barnettef0859852012-08-20 14:55:50 -0700209 # self.env is a dictionary of environment variable settings
210 # to be exported for commands run on the host.
211 # LIBC_FATAL_STDERR_ can be useful for diagnosing certain
212 # errors that might happen.
213 self.env['LIBC_FATAL_STDERR_'] = '1'
beeps32a63082013-08-22 14:02:29 -0700214 self._rpc_proxy_map = {}
Fang Dengd1c2b732013-08-20 12:59:46 -0700215 self._ssh_verbosity_flag = ssh_verbosity_flag
Aviv Keshetc5947fa2013-09-04 14:06:29 -0700216 self._ssh_options = ssh_options
Fang Deng5d518f42013-08-02 14:04:32 -0700217 self.servo = None
218 # TODO(fdeng): We need to simplify the
219 # process of servo and servo_host initialization.
220 # crbug.com/298432
221 self._servo_host = self._create_servo_host(servo_args)
222 # TODO(fdeng): 'servo_args is not None' is used to determine whether
223 # a test needs a servo. Better solution is needed.
224 # There are three possible cases here:
225 # 1. servo_arg is None
226 # 2. servo arg is an empty dictionary
227 # 3. servo_arg is a dictionary that has entries of 'servo_host',
228 # 'servo_port'(optional).
229 # We assume that:
230 # a. A test that requires a servo always calls get_servo_arguments
231 # and passes in its return value as |servo_args|.
232 # b. get_servo_arguments never returns None.
233 # Based on the assumptions, we reason that only in case 2 and 3
234 # a servo is required, i.e. when the servo_args is not None.
235 if servo_args is not None:
236 self.servo = self._servo_host.create_healthy_servo_object()
237
238
239 def _create_servo_host(self, servo_args):
240 """Create a ServoHost object.
241
242 There three possible cases:
243 1) If the DUT is in Cros Lab and has a beaglebone and a servo, then
244 create a ServoHost object pointing to the beaglebone. servo_args
245 is ignored.
246 2) If not case 1) and servo_args is neither None nor empty, then
247 create a ServoHost object using servo_args.
248 3) If neither case 1) or 2) applies, return None.
249
250 @param servo_args: A dictionary that contains args for creating
251 a ServoHost object,
252 e.g. {'servo_host': '172.11.11.111',
253 'servo_port': 9999}.
254 See comments above.
255
256 @returns: A ServoHost object or None. See comments above.
257
258 """
259 servo_host_name = servo_host.make_servo_hostname(self.hostname)
260 if utils.host_is_in_lab_zone(servo_host_name):
261 return servo_host.ServoHost(servo_host=servo_host_name)
262 elif servo_args is not None:
263 return servo_host.ServoHost(**servo_args)
264 else:
265 return None
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700266
267
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500268 def get_repair_image_name(self):
269 """Generate a image_name from variables in the global config.
270
271 @returns a str of $board-version/$BUILD.
272
273 """
274 stable_version = global_config.global_config.get_config_value(
275 'CROS', 'stable_cros_version')
276 build_pattern = global_config.global_config.get_config_value(
277 'CROS', 'stable_build_pattern')
278 board = self._get_board_from_afe()
279 if board is None:
280 raise error.AutoservError('DUT has no board attribute, '
281 'cannot be repaired.')
282 return build_pattern % (board, stable_version)
283
284
Scott Zawalski62bacae2013-03-05 10:40:32 -0500285 def _host_in_AFE(self):
286 """Check if the host is an object the AFE knows.
287
288 @returns the host object.
289 """
290 return self._AFE.get_hosts(hostname=self.hostname)
291
292
Chris Sosab76e0ee2013-05-22 16:55:41 -0700293 def lookup_job_repo_url(self):
294 """Looks up the job_repo_url for the host.
295
296 @returns job_repo_url from AFE or None if not found.
297
298 @raises KeyError if the host does not have a job_repo_url
299 """
300 if not self._host_in_AFE():
301 return None
302
303 hosts = self._AFE.get_hosts(hostname=self.hostname)
beepsb5efc532013-06-04 11:29:34 -0700304 if hosts and ds_constants.JOB_REPO_URL in hosts[0].attributes:
305 return hosts[0].attributes[ds_constants.JOB_REPO_URL]
Chris Sosab76e0ee2013-05-22 16:55:41 -0700306
307
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500308 def clear_cros_version_labels_and_job_repo_url(self):
309 """Clear cros_version labels and host attribute job_repo_url."""
Scott Zawalski62bacae2013-03-05 10:40:32 -0500310 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400311 return
312
Scott Zawalski62bacae2013-03-05 10:40:32 -0500313 host_list = [self.hostname]
314 labels = self._AFE.get_labels(
315 name__startswith=ds_constants.VERSION_PREFIX,
316 host__hostname=self.hostname)
Dan Shi0f466e82013-02-22 15:44:58 -0800317
Scott Zawalski62bacae2013-03-05 10:40:32 -0500318 for label in labels:
319 label.remove_hosts(hosts=host_list)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500320
beepscb6f1e22013-06-28 19:14:10 -0700321 self.update_job_repo_url(None, None)
322
323
324 def update_job_repo_url(self, devserver_url, image_name):
325 """
326 Updates the job_repo_url host attribute and asserts it's value.
327
328 @param devserver_url: The devserver to use in the job_repo_url.
329 @param image_name: The name of the image to use in the job_repo_url.
330
331 @raises AutoservError: If we failed to update the job_repo_url.
332 """
333 repo_url = None
334 if devserver_url and image_name:
335 repo_url = tools.get_package_url(devserver_url, image_name)
336 self._AFE.set_host_attribute(ds_constants.JOB_REPO_URL, repo_url,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500337 hostname=self.hostname)
beepscb6f1e22013-06-28 19:14:10 -0700338 if self.lookup_job_repo_url() != repo_url:
339 raise error.AutoservError('Failed to update job_repo_url with %s, '
340 'host %s' % (repo_url, self.hostname))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500341
342
Dan Shie9309262013-06-19 22:50:21 -0700343 def add_cros_version_labels_and_job_repo_url(self, image_name):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400344 """Add cros_version labels and host attribute job_repo_url.
345
346 @param image_name: The name of the image e.g.
347 lumpy-release/R27-3837.0.0
Dan Shi7458bf62013-06-10 12:50:16 -0700348
Scott Zawalskieadbf702013-03-14 09:23:06 -0400349 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500350 if not self._host_in_AFE():
Scott Zawalskieadbf702013-03-14 09:23:06 -0400351 return
Scott Zawalski62bacae2013-03-05 10:40:32 -0500352
Scott Zawalskieadbf702013-03-14 09:23:06 -0400353 cros_label = '%s%s' % (ds_constants.VERSION_PREFIX, image_name)
Dan Shie9309262013-06-19 22:50:21 -0700354 devserver_url = dev_server.ImageServer.resolve(image_name).url()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500355
356 labels = self._AFE.get_labels(name=cros_label)
357 if labels:
358 label = labels[0]
359 else:
360 label = self._AFE.create_label(name=cros_label)
361
362 label.add_hosts([self.hostname])
beepscb6f1e22013-06-28 19:14:10 -0700363 self.update_job_repo_url(devserver_url, image_name)
364
365
beepsdae65fd2013-07-26 16:24:41 -0700366 def verify_job_repo_url(self, tag=''):
beepscb6f1e22013-06-28 19:14:10 -0700367 """
368 Make sure job_repo_url of this host is valid.
369
joychen03eaad92013-06-26 09:55:21 -0700370 Eg: The job_repo_url "http://lmn.cd.ab.xyx:8080/static/\
beepscb6f1e22013-06-28 19:14:10 -0700371 lumpy-release/R29-4279.0.0/autotest/packages" claims to have the
372 autotest package for lumpy-release/R29-4279.0.0. If this isn't the case,
373 download and extract it. If the devserver embedded in the url is
374 unresponsive, update the job_repo_url of the host after staging it on
375 another devserver.
376
377 @param job_repo_url: A url pointing to the devserver where the autotest
378 package for this build should be staged.
beepsdae65fd2013-07-26 16:24:41 -0700379 @param tag: The tag from the server job, in the format
380 <job_id>-<user>/<hostname>, or <hostless> for a server job.
beepscb6f1e22013-06-28 19:14:10 -0700381
382 @raises DevServerException: If we could not resolve a devserver.
383 @raises AutoservError: If we're unable to save the new job_repo_url as
384 a result of choosing a new devserver because the old one failed to
385 respond to a health check.
beeps0c865032013-07-30 11:37:06 -0700386 @raises urllib2.URLError: If the devserver embedded in job_repo_url
387 doesn't respond within the timeout.
beepscb6f1e22013-06-28 19:14:10 -0700388 """
389 job_repo_url = self.lookup_job_repo_url()
390 if not job_repo_url:
391 logging.warning('No job repo url set on host %s', self.hostname)
392 return
393
394 logging.info('Verifying job repo url %s', job_repo_url)
395 devserver_url, image_name = tools.get_devserver_build_from_package_url(
396 job_repo_url)
397
beeps0c865032013-07-30 11:37:06 -0700398 ds = dev_server.ImageServer(devserver_url)
beepscb6f1e22013-06-28 19:14:10 -0700399
400 logging.info('Staging autotest artifacts for %s on devserver %s',
401 image_name, ds.url())
beeps687243d2013-07-18 15:29:27 -0700402
403 start_time = time.time()
beepscb6f1e22013-06-28 19:14:10 -0700404 ds.stage_artifacts(image_name, ['autotest'])
beeps687243d2013-07-18 15:29:27 -0700405 stage_time = time.time() - start_time
406
407 # Record how much of the verification time comes from a devserver
408 # restage. If we're doing things right we should not see multiple
409 # devservers for a given board/build/branch path.
410 try:
411 board, build_type, branch = site_utils.ParseBuildName(
412 image_name)[:3]
413 except site_utils.ParseBuildNameException as e:
414 pass
415 else:
beeps0c865032013-07-30 11:37:06 -0700416 devserver = devserver_url[
417 devserver_url.find('/')+2:devserver_url.rfind(':')]
beeps687243d2013-07-18 15:29:27 -0700418 stats_key = {
419 'board': board,
420 'build_type': build_type,
421 'branch': branch,
beeps0c865032013-07-30 11:37:06 -0700422 'devserver': devserver.replace('.', '_'),
beeps687243d2013-07-18 15:29:27 -0700423 }
424 stats.Gauge('verify_job_repo_url').send(
425 '%(board)s.%(build_type)s.%(branch)s.%(devserver)s' % stats_key,
426 stage_time)
beepscb6f1e22013-06-28 19:14:10 -0700427
Scott Zawalskieadbf702013-03-14 09:23:06 -0400428
Dan Shi0f466e82013-02-22 15:44:58 -0800429 def _try_stateful_update(self, update_url, force_update, updater):
430 """Try to use stateful update to initialize DUT.
431
432 When DUT is already running the same version that machine_install
433 tries to install, stateful update is a much faster way to clean up
434 the DUT for testing, compared to a full reimage. It is implemeted
435 by calling autoupdater.run_update, but skipping updating root, as
436 updating the kernel is time consuming and not necessary.
437
438 @param update_url: url of the image.
439 @param force_update: Set to True to update the image even if the DUT
440 is running the same version.
441 @param updater: ChromiumOSUpdater instance used to update the DUT.
442 @returns: True if the DUT was updated with stateful update.
443
444 """
445 if not updater.check_version():
446 return False
447 if not force_update:
448 logging.info('Canceling stateful update because the new and '
449 'old versions are the same.')
450 return False
451 # Following folders should be rebuilt after stateful update.
452 # A test file is used to confirm each folder gets rebuilt after
453 # the stateful update.
454 folders_to_check = ['/var', '/home', '/mnt/stateful_partition']
455 test_file = '.test_file_to_be_deleted'
456 for folder in folders_to_check:
457 touch_path = os.path.join(folder, test_file)
458 self.run('touch %s' % touch_path)
459
460 if not updater.run_update(force_update=True, update_root=False):
461 return False
462
463 # Reboot to complete stateful update.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700464 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Dan Shi0f466e82013-02-22 15:44:58 -0800465 check_file_cmd = 'test -f %s; echo $?'
466 for folder in folders_to_check:
467 test_file_path = os.path.join(folder, test_file)
468 result = self.run(check_file_cmd % test_file_path,
469 ignore_status=True)
470 if result.exit_status == 1:
471 return False
472 return True
473
474
J. Richard Barnette7275b612013-06-04 18:13:11 -0700475 def _post_update_processing(self, updater, expected_kernel=None):
Dan Shi0f466e82013-02-22 15:44:58 -0800476 """After the DUT is updated, confirm machine_install succeeded.
477
478 @param updater: ChromiumOSUpdater instance used to update the DUT.
J. Richard Barnette7275b612013-06-04 18:13:11 -0700479 @param expected_kernel: kernel expected to be active after reboot,
480 or `None` to skip rollback checking.
Dan Shi0f466e82013-02-22 15:44:58 -0800481
482 """
J. Richard Barnette7275b612013-06-04 18:13:11 -0700483 # Touch the lab machine file to leave a marker that
484 # distinguishes this image from other test images.
485 # Afterwards, we must re-run the autoreboot script because
486 # it depends on the _LAB_MACHINE_FILE.
Dan Shi0f466e82013-02-22 15:44:58 -0800487 self.run('touch %s' % self._LAB_MACHINE_FILE)
Dan Shi0f466e82013-02-22 15:44:58 -0800488 self.run('start autoreboot')
489
J. Richard Barnette7275b612013-06-04 18:13:11 -0700490 # Figure out the newly active kernel.
491 active_kernel, _ = updater.get_kernel_state()
492
493 # Check for rollback due to a bad build.
494 if expected_kernel and active_kernel != expected_kernel:
495 # Print out some information to make it easier to debug
496 # the rollback.
Dan Shi0f466e82013-02-22 15:44:58 -0800497 logging.debug('Dumping partition table.')
Dan Shi346725f2013-03-20 15:22:38 -0700498 self.run('cgpt show $(rootdev -s -d)')
Dan Shi0f466e82013-02-22 15:44:58 -0800499 logging.debug('Dumping crossystem for firmware debugging.')
Dan Shi346725f2013-03-20 15:22:38 -0700500 self.run('crossystem --all')
Dan Shi0f466e82013-02-22 15:44:58 -0800501 raise autoupdater.ChromiumOSError(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700502 'Build %s failed to boot on %s; system rolled back '
503 'to previous build' % (updater.update_version,
504 self.hostname))
Dan Shi0f466e82013-02-22 15:44:58 -0800505
J. Richard Barnette7275b612013-06-04 18:13:11 -0700506 # Check that we've got the build we meant to install.
507 if not updater.check_version_to_confirm_install():
508 raise autoupdater.ChromiumOSError(
509 'Failed to update %s to build %s; found build '
510 '%s instead' % (self.hostname,
511 updater.update_version,
512 updater.get_build_id()))
Scott Zawalski62bacae2013-03-05 10:40:32 -0500513
J. Richard Barnette7275b612013-06-04 18:13:11 -0700514 # Make sure chromeos-setgoodkernel runs.
515 try:
Dan Shi0f466e82013-02-22 15:44:58 -0800516 utils.poll_for_condition(
J. Richard Barnette7275b612013-06-04 18:13:11 -0700517 lambda: (updater.get_kernel_tries(active_kernel) == 0
518 and updater.get_kernel_success(active_kernel)),
519 exception=autoupdater.ChromiumOSError(),
Dan Shi0f466e82013-02-22 15:44:58 -0800520 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
J. Richard Barnette7275b612013-06-04 18:13:11 -0700521 except autoupdater.ChromiumOSError as e:
522 services_status = self.run('status system-services').stdout
523 if services_status != 'system-services start/running\n':
524 event = ('Chrome failed to reach login screen')
525 else:
526 event = ('update-engine failed to call '
527 'chromeos-setgoodkernel')
528 raise autoupdater.ChromiumOSError(
529 'After update and reboot, %s '
530 'within %d seconds' % (event,
531 self._KERNEL_UPDATE_TIMEOUT))
Dan Shi0f466e82013-02-22 15:44:58 -0800532
533
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700534 def _stage_image_for_update(self, image_name=None):
Scott Zawalskieadbf702013-03-14 09:23:06 -0400535 """Stage a build on a devserver and return the update_url.
536
537 @param image_name: a name like lumpy-release/R27-3837.0.0
538 @returns an update URL like:
539 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
540 """
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700541 if not image_name:
542 image_name = self.get_repair_image_name()
543 logging.info('Staging build for AU: %s', image_name)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400544 devserver = dev_server.ImageServer.resolve(image_name)
545 devserver.trigger_download(image_name, synchronous=False)
546 return tools.image_url_pattern() % (devserver.url(), image_name)
547
548
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700549 def stage_image_for_servo(self, image_name=None):
550 """Stage a build on a devserver and return the update_url.
551
552 @param image_name: a name like lumpy-release/R27-3837.0.0
553 @returns an update URL like:
554 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
555 """
556 if not image_name:
557 image_name = self.get_repair_image_name()
558 logging.info('Staging build for servo install: %s', image_name)
559 devserver = dev_server.ImageServer.resolve(image_name)
560 devserver.stage_artifacts(image_name, ['test_image'])
561 return devserver.get_test_image_url(image_name)
562
563
beepse539be02013-07-31 21:57:39 -0700564 def stage_factory_image_for_servo(self, image_name):
565 """Stage a build on a devserver and return the update_url.
566
567 @param image_name: a name like <baord>/4262.204.0
beeps12c0a3c2013-09-03 11:58:27 -0700568
beepse539be02013-07-31 21:57:39 -0700569 @return: An update URL, eg:
570 http://<devserver>/static/canary-channel/\
571 <board>/4262.204.0/factory_test/chromiumos_factory_image.bin
beeps12c0a3c2013-09-03 11:58:27 -0700572
573 @raises: ValueError if the factory artifact name is missing from
574 the config.
575
beepse539be02013-07-31 21:57:39 -0700576 """
577 if not image_name:
578 logging.error('Need an image_name to stage a factory image.')
579 return
580
beeps12c0a3c2013-09-03 11:58:27 -0700581 factory_artifact = global_config.global_config.get_config_value(
582 'CROS', 'factory_artifact', type=str, default='')
583 if not factory_artifact:
584 raise ValueError('Cannot retrieve the factory artifact name from '
585 'autotest config, and hence cannot stage factory '
586 'artifacts.')
587
beepse539be02013-07-31 21:57:39 -0700588 logging.info('Staging build for servo install: %s', image_name)
589 devserver = dev_server.ImageServer.resolve(image_name)
590 devserver.stage_artifacts(
591 image_name,
beeps12c0a3c2013-09-03 11:58:27 -0700592 [factory_artifact],
593 archive_url=None)
beepse539be02013-07-31 21:57:39 -0700594
595 return tools.factory_image_url_pattern() % (devserver.url(), image_name)
596
597
Chris Sosaa3ac2152012-05-23 22:23:13 -0700598 def machine_install(self, update_url=None, force_update=False,
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500599 local_devserver=False, repair=False):
600 """Install the DUT.
601
Dan Shi0f466e82013-02-22 15:44:58 -0800602 Use stateful update if the DUT is already running the same build.
603 Stateful update does not update kernel and tends to run much faster
604 than a full reimage. If the DUT is running a different build, or it
605 failed to do a stateful update, full update, including kernel update,
606 will be applied to the DUT.
607
Scott Zawalskieadbf702013-03-14 09:23:06 -0400608 Once a host enters machine_install its cros_version label will be
609 removed as well as its host attribute job_repo_url (used for
610 package install).
611
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500612 @param update_url: The url to use for the update
613 pattern: http://$devserver:###/update/$build
614 If update_url is None and repair is True we will install the
615 stable image listed in global_config under
616 CROS.stable_cros_version.
617 @param force_update: Force an update even if the version installed
618 is the same. Default:False
619 @param local_devserver: Used by run_remote_test to allow people to
620 use their local devserver. Default: False
621 @param repair: Whether or not we are in repair mode. This adds special
622 cases for repairing a machine like starting update_engine.
623 Setting repair to True sets force_update to True as well.
624 default: False
625 @raises autoupdater.ChromiumOSError
626
627 """
Dan Shi7458bf62013-06-10 12:50:16 -0700628 if update_url:
629 logging.debug('update url is set to %s', update_url)
630 else:
631 logging.debug('update url is not set, resolving...')
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700632 if self._parser.options.image:
633 requested_build = self._parser.options.image
634 if requested_build.startswith('http://'):
635 update_url = requested_build
Dan Shi7458bf62013-06-10 12:50:16 -0700636 logging.debug('update url is retrieved from requested_build'
637 ': %s', update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700638 else:
639 # Try to stage any build that does not start with
640 # http:// on the devservers defined in
641 # global_config.ini.
Dan Shi7458bf62013-06-10 12:50:16 -0700642 update_url = self._stage_image_for_update(requested_build)
643 logging.debug('Build staged, and update_url is set to: %s',
644 update_url)
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700645 elif repair:
646 update_url = self._stage_image_for_update()
Dan Shi7458bf62013-06-10 12:50:16 -0700647 logging.debug('Build staged, and update_url is set to: %s',
648 update_url)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400649 else:
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700650 raise autoupdater.ChromiumOSError(
651 'Update failed. No update URL provided.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500652
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500653 if repair:
Dan Shi0f466e82013-02-22 15:44:58 -0800654 # In case the system is in a bad state, we always reboot the machine
655 # before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700656 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500657 self.run('stop update-engine; start update-engine')
658 force_update = True
Dan Shi0f466e82013-02-22 15:44:58 -0800659
Chris Sosaa3ac2152012-05-23 22:23:13 -0700660 updater = autoupdater.ChromiumOSUpdater(update_url, host=self,
Chris Sosa72312602013-04-16 15:01:56 -0700661 local_devserver=local_devserver)
Dan Shi0f466e82013-02-22 15:44:58 -0800662 updated = False
Scott Zawalskieadbf702013-03-14 09:23:06 -0400663 # Remove cros-version and job_repo_url host attribute from host.
664 self.clear_cros_version_labels_and_job_repo_url()
Dan Shi0f466e82013-02-22 15:44:58 -0800665 # If the DUT is already running the same build, try stateful update
666 # first. Stateful update does not update kernel and tends to run much
667 # faster than a full reimage.
668 try:
Chris Sosab76e0ee2013-05-22 16:55:41 -0700669 updated = self._try_stateful_update(
670 update_url, force_update, updater)
Dan Shi0f466e82013-02-22 15:44:58 -0800671 if updated:
672 logging.info('DUT is updated with stateful update.')
673 except Exception as e:
674 logging.exception(e)
675 logging.warn('Failed to stateful update DUT, force to update.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700676
Dan Shi0f466e82013-02-22 15:44:58 -0800677 inactive_kernel = None
678 # Do a full update if stateful update is not applicable or failed.
679 if not updated:
680 # In case the system is in a bad state, we always reboot the
681 # machine before machine_install.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700682 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
Chris Sosab7612bc2013-03-21 10:32:37 -0700683
684 # TODO(sosa): Remove temporary hack to get rid of bricked machines
685 # that can't update due to a corrupted policy.
686 self.run('rm -rf /var/lib/whitelist')
687 self.run('touch /var/lib/whitelist')
688 self.run('chmod -w /var/lib/whitelist')
Scott Zawalskib550d5a2013-03-22 09:23:59 -0400689 self.run('stop update-engine; start update-engine')
Chris Sosab7612bc2013-03-21 10:32:37 -0700690
Dan Shi0f466e82013-02-22 15:44:58 -0800691 if updater.run_update(force_update):
692 updated = True
693 # Figure out active and inactive kernel.
694 active_kernel, inactive_kernel = updater.get_kernel_state()
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700695
Dan Shi0f466e82013-02-22 15:44:58 -0800696 # Ensure inactive kernel has higher priority than active.
697 if (updater.get_kernel_priority(inactive_kernel)
698 < updater.get_kernel_priority(active_kernel)):
699 raise autoupdater.ChromiumOSError(
700 'Update failed. The priority of the inactive kernel'
701 ' partition is less than that of the active kernel'
702 ' partition.')
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700703
Dan Shi0f466e82013-02-22 15:44:58 -0800704 # Updater has returned successfully; reboot the host.
Chris Sosab76e0ee2013-05-22 16:55:41 -0700705 self.reboot(timeout=self.REBOOT_TIMEOUT, wait=True)
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700706
Dan Shi0f466e82013-02-22 15:44:58 -0800707 if updated:
708 self._post_update_processing(updater, inactive_kernel)
Scott Zawalskieadbf702013-03-14 09:23:06 -0400709 image_name = autoupdater.url_to_image_name(update_url)
Dan Shie9309262013-06-19 22:50:21 -0700710 self.add_cros_version_labels_and_job_repo_url(image_name)
Simran Basi13fa1ba2013-03-04 10:56:47 -0800711
J. Richard Barnette45e93de2012-04-11 17:24:15 -0700712 # Clean up any old autotest directories which may be lying around.
713 for path in global_config.global_config.get_config_value(
714 'AUTOSERV', 'client_autodir_paths', type=list):
715 self.run('rm -rf ' + path)
716
717
Dan Shi10e992b2013-08-30 11:02:59 -0700718 def show_update_engine_log(self):
719 """Output update engine log."""
720 logging.debug('Dumping %s', constants.UPDATE_ENGINE_LOG)
721 self.run('cat %s' % constants.UPDATE_ENGINE_LOG)
722
723
Richard Barnette82c35912012-11-20 10:09:10 -0800724 def _get_board_from_afe(self):
725 """Retrieve this host's board from its labels in the AFE.
726
727 Looks for a host label of the form "board:<board>", and
728 returns the "<board>" part of the label. `None` is returned
729 if there is not a single, unique label matching the pattern.
730
731 @returns board from label, or `None`.
732 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700733 return server_utils.get_board_from_afe(self.hostname, self._AFE)
Simran Basi833814b2013-01-29 13:13:43 -0800734
735
736 def get_build(self):
737 """Retrieve the current build for this Host from the AFE.
738
739 Looks through this host's labels in the AFE to determine its build.
740
741 @returns The current build or None if it could not find it or if there
742 were multiple build labels assigned to this host.
743 """
Dan Shia1ecd5c2013-06-06 11:21:31 -0700744 return server_utils.get_build_from_afe(self.hostname, self._AFE)
Richard Barnette82c35912012-11-20 10:09:10 -0800745
746
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500747 def _install_repair(self):
748 """Attempt to repair this host using upate-engine.
749
750 If the host is up, try installing the DUT with a stable
751 "repair" version of Chrome OS as defined in the global_config
752 under CROS.stable_cros_version.
753
Scott Zawalski62bacae2013-03-05 10:40:32 -0500754 @raises AutoservRepairMethodNA if the DUT is not reachable.
755 @raises ChromiumOSError if the install failed for some reason.
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500756
757 """
758 if not self.is_up():
Scott Zawalski62bacae2013-03-05 10:40:32 -0500759 raise error.AutoservRepairMethodNA('DUT unreachable for install.')
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500760
761 logging.info('Attempting to reimage machine to repair image.')
762 try:
763 self.machine_install(repair=True)
Fang Dengd0672f32013-03-18 17:18:09 -0700764 except autoupdater.ChromiumOSError as e:
765 logging.exception(e)
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500766 logging.info('Repair via install failed.')
Scott Zawalski62bacae2013-03-05 10:40:32 -0500767 raise
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500768
769
beepsf079cfb2013-09-18 17:49:51 -0700770 def servo_install(self, image_url=None, usb_boot_timeout=USB_BOOT_TIMEOUT,
771 install_timeout=INSTALL_TIMEOUT):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500772 """
773 Re-install the OS on the DUT by:
774 1) installing a test image on a USB storage device attached to the Servo
775 board,
Richard Barnette03a0c132012-11-05 12:40:35 -0800776 2) booting that image in recovery mode, and then
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700777 3) installing the image with chromeos-install.
778
Scott Zawalski62bacae2013-03-05 10:40:32 -0500779 @param image_url: If specified use as the url to install on the DUT.
780 otherwise boot the currently staged image on the USB stick.
beepsf079cfb2013-09-18 17:49:51 -0700781 @param usb_boot_timeout: The usb_boot_timeout to use during reimage.
782 Factory images need a longer usb_boot_timeout than regular
783 cros images.
784 @param install_timeout: The timeout to use when installing the chromeos
785 image. Factory images need a longer install_timeout.
Richard Barnette03a0c132012-11-05 12:40:35 -0800786
Scott Zawalski62bacae2013-03-05 10:40:32 -0500787 @raises AutoservError if the image fails to boot.
Richard Barnette03a0c132012-11-05 12:40:35 -0800788 """
beepsf079cfb2013-09-18 17:49:51 -0700789
790 usb_boot_timer_key = ('servo_install.usb_boot_timeout_%s'
791 % usb_boot_timeout)
792 logging.info('Downloading image to USB, then booting from it. Usb boot '
793 'timeout = %s', usb_boot_timeout)
794 timer = stats.Timer(usb_boot_timer_key)
795 timer.start()
J. Richard Barnette31b2e312013-04-04 16:05:22 -0700796 self.servo.install_recovery_image(image_url)
beepsf079cfb2013-09-18 17:49:51 -0700797 if not self.wait_up(timeout=usb_boot_timeout):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500798 raise error.AutoservRepairFailure(
799 'DUT failed to boot from USB after %d seconds' %
beepsf079cfb2013-09-18 17:49:51 -0700800 usb_boot_timeout)
801 timer.stop()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500802
beepsf079cfb2013-09-18 17:49:51 -0700803 install_timer_key = ('servo_install.install_timeout_%s'
804 % install_timeout)
805 timer = stats.Timer(install_timer_key)
806 timer.start()
807 logging.info('Installing image through chromeos-install.')
808 self.run('chromeos-install --yes', timeout=install_timeout)
809 timer.stop()
810
811 logging.info('Power cycling DUT through servo.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800812 self.servo.power_long_press()
Fang Dengafb88142013-05-30 17:44:31 -0700813 self.servo.switch_usbkey('off')
J. Richard Barnettefbcc7122013-07-24 18:24:59 -0700814 # We *must* use power_on() here; on Parrot it's how we get
815 # out of recovery mode.
816 self.servo.get_power_state_controller().power_on()
beepsf079cfb2013-09-18 17:49:51 -0700817
818 logging.info('Waiting for DUT to come back up.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800819 if not self.wait_up(timeout=self.BOOT_TIMEOUT):
820 raise error.AutoservError('DUT failed to reboot installed '
821 'test image after %d seconds' %
Scott Zawalski62bacae2013-03-05 10:40:32 -0500822 self.BOOT_TIMEOUT)
823
824
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700825 def _servo_repair_reinstall(self):
Scott Zawalski62bacae2013-03-05 10:40:32 -0500826 """Reinstall the DUT utilizing servo and a test image.
827
828 Re-install the OS on the DUT by:
829 1) installing a test image on a USB storage device attached to the Servo
830 board,
831 2) booting that image in recovery mode, and then
832 3) installing the image with chromeos-install.
833
Scott Zawalski62bacae2013-03-05 10:40:32 -0500834 @raises AutoservRepairMethodNA if the device does not have servo
835 support.
836
837 """
838 if not self.servo:
839 raise error.AutoservRepairMethodNA('Repair Reinstall NA: '
840 'DUT has no servo support.')
841
842 logging.info('Attempting to recovery servo enabled device with '
843 'servo_repair_reinstall')
844
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700845 image_url = self.stage_image_for_servo()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500846 self.servo_install(image_url)
847
848
849 def _servo_repair_power(self):
850 """Attempt to repair DUT using an attached Servo.
851
852 Attempt to power on the DUT via power_long_press.
853
854 @raises AutoservRepairMethodNA if the device does not have servo
855 support.
856 @raises AutoservRepairFailure if the repair fails for any reason.
857 """
858 if not self.servo:
859 raise error.AutoservRepairMethodNA('Repair Power NA: '
860 'DUT has no servo support.')
861
862 logging.info('Attempting to recover servo enabled device by '
863 'powering it off and on.')
864 self.servo.get_power_state_controller().power_off()
865 self.servo.get_power_state_controller().power_on()
866 if self.wait_up(self.BOOT_TIMEOUT):
867 return
868
869 raise error.AutoservRepairFailure('DUT did not boot after long_press.')
Richard Barnette03a0c132012-11-05 12:40:35 -0800870
871
Richard Barnette82c35912012-11-20 10:09:10 -0800872 def _powercycle_to_repair(self):
873 """Utilize the RPM Infrastructure to bring the host back up.
874
875 If the host is not up/repaired after the first powercycle we utilize
876 auto fallback to the last good install by powercycling and rebooting the
877 host 6 times.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500878
879 @raises AutoservRepairMethodNA if the device does not support remote
880 power.
881 @raises AutoservRepairFailure if the repair fails for any reason.
882
Richard Barnette82c35912012-11-20 10:09:10 -0800883 """
Scott Zawalski62bacae2013-03-05 10:40:32 -0500884 if not self.has_power():
885 raise error.AutoservRepairMethodNA('Device does not support power.')
886
Richard Barnette82c35912012-11-20 10:09:10 -0800887 logging.info('Attempting repair via RPM powercycle.')
888 failed_cycles = 0
889 self.power_cycle()
890 while not self.wait_up(timeout=self.BOOT_TIMEOUT):
891 failed_cycles += 1
892 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
Scott Zawalski62bacae2013-03-05 10:40:32 -0500893 raise error.AutoservRepairFailure(
894 'Powercycled host %s %d times; device did not come back'
895 ' online.' % (self.hostname, failed_cycles))
Richard Barnette82c35912012-11-20 10:09:10 -0800896 self.power_cycle()
897 if failed_cycles == 0:
898 logging.info('Powercycling was successful first time.')
899 else:
900 logging.info('Powercycling was successful after %d failures.',
901 failed_cycles)
902
903
904 def repair_full(self):
905 """Repair a host for repair level NO_PROTECTION.
906
907 This overrides the base class function for repair; it does
908 not call back to the parent class, but instead offers a
909 simplified implementation based on the capabilities in the
910 Chrome OS test lab.
911
Fang Deng5d518f42013-08-02 14:04:32 -0700912 It first verifies and repairs servo if it is a DUT in CrOS
Fang Deng03590af2013-10-07 17:34:20 -0700913 lab and a servo is attached.
Fang Deng5d518f42013-08-02 14:04:32 -0700914
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700915 If `self.verify()` fails, the following procedures are
916 attempted:
917 1. Try to re-install to a known stable image using
918 auto-update.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500919 2. If there's a servo for the DUT, try to power the DUT off and
920 on.
921 3. If there's a servo for the DUT, try to re-install via
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700922 the servo.
Scott Zawalski62bacae2013-03-05 10:40:32 -0500923 4. If the DUT can be power-cycled via RPM, try to repair
Richard Barnette82c35912012-11-20 10:09:10 -0800924 by power-cycling.
925
926 As with the parent method, the last operation performed on
927 the DUT must be to call `self.verify()`; if that call fails,
928 the exception it raises is passed back to the caller.
J. Richard Barnettefde55fc2013-03-15 17:47:01 -0700929
Scott Zawalski62bacae2013-03-05 10:40:32 -0500930 @raises AutoservRepairTotalFailure if the repair process fails to
931 fix the DUT.
Fang Deng5d518f42013-08-02 14:04:32 -0700932 @raises ServoHostRepairTotalFailure if the repair process fails to
933 fix the servo host if one is attached to the DUT.
934 @raises AutoservSshPermissionDeniedError if it is unable
935 to ssh to the servo host due to permission error.
936
Richard Barnette82c35912012-11-20 10:09:10 -0800937 """
Fang Deng5d518f42013-08-02 14:04:32 -0700938 if self._servo_host:
Fang Deng03590af2013-10-07 17:34:20 -0700939 try:
940 self.servo = self._servo_host.create_healthy_servo_object()
941 except Exception as e:
942 self.servo = None
943 logging.error('Could not create a healthy servo: %s', e)
Fang Deng5d518f42013-08-02 14:04:32 -0700944
Scott Zawalski62bacae2013-03-05 10:40:32 -0500945 # TODO(scottz): This should use something similar to label_decorator,
946 # but needs to be populated in order so DUTs are repaired with the
947 # least amount of effort.
948 repair_funcs = [self._install_repair, self._servo_repair_power,
J. Richard Barnettee4af8b92013-05-01 13:16:12 -0700949 self._servo_repair_reinstall,
Scott Zawalski62bacae2013-03-05 10:40:32 -0500950 self._powercycle_to_repair]
951 errors = []
Simran Basie6130932013-10-01 14:07:52 -0700952 board = self._get_board_from_afe()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500953 for repair_func in repair_funcs:
954 try:
955 repair_func()
956 self.verify()
Simran Basie6130932013-10-01 14:07:52 -0700957 stats.Counter(
958 '%s.SUCCEEDED' % repair_func.__name__).increment()
959 if board:
960 stats.Counter(
961 '%s.SUCCEEDED.%s' % (repair_func.__name__,
962 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500963 return
Simran Basie6130932013-10-01 14:07:52 -0700964 except error.AutoservRepairMethodNA as e:
965 stats.Counter(
966 '%s.RepairNA' % repair_func.__name__).increment()
967 if board:
968 stats.Counter(
969 '%s.RepairNA.%s' % (repair_func.__name__,
970 board)).increment()
971 logging.warn('Repair function NA: %s', e)
972 errors.append(str(e))
Scott Zawalski62bacae2013-03-05 10:40:32 -0500973 except Exception as e:
Simran Basie6130932013-10-01 14:07:52 -0700974 stats.Counter(
975 '%s.FAILED' % repair_func.__name__).increment()
976 if board:
977 stats.Counter(
978 '%s.FAILED.%s' % (repair_func.__name__,
979 board)).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500980 logging.warn('Failed to repair device: %s', e)
981 errors.append(str(e))
Scott Zawalski89c44dd2013-02-26 09:28:02 -0500982
Simran Basie6130932013-10-01 14:07:52 -0700983 stats.Counter('Full_Repair_Failed').increment()
984 if board:
985 stats.Counter(
986 'Full_Repair_Failed.%s' % board).increment()
Scott Zawalski62bacae2013-03-05 10:40:32 -0500987 raise error.AutoservRepairTotalFailure(
988 'All attempts at repairing the device failed:\n%s' %
989 '\n'.join(errors))
Richard Barnette82c35912012-11-20 10:09:10 -0800990
991
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700992 def close(self):
beeps32a63082013-08-22 14:02:29 -0700993 self.rpc_disconnect_all()
Fang Deng0ca40e22013-08-27 17:47:44 -0700994 super(CrosHost, self).close()
J. Richard Barnette1d78b012012-05-15 13:56:30 -0700995
996
Simran Basi5e6339a2013-03-21 11:34:32 -0700997 def _cleanup_poweron(self):
998 """Special cleanup method to make sure hosts always get power back."""
999 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1000 hosts = afe.get_hosts(hostname=self.hostname)
1001 if not hosts or not (self._RPM_OUTLET_CHANGED in
1002 hosts[0].attributes):
1003 return
1004 logging.debug('This host has recently interacted with the RPM'
1005 ' Infrastructure. Ensuring power is on.')
1006 try:
1007 self.power_on()
1008 except rpm_client.RemotePowerException:
1009 # If cleanup has completed but there was an issue with the RPM
1010 # Infrastructure, log an error message rather than fail cleanup
1011 logging.error('Failed to turn Power On for this host after '
1012 'cleanup through the RPM Infrastructure.')
1013 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, None,
1014 hostname=self.hostname)
1015
1016
beepsc87ff602013-07-31 21:53:00 -07001017 def _is_factory_image(self):
1018 """Checks if the image on the DUT is a factory image.
1019
1020 @return: True if the image on the DUT is a factory image.
1021 False otherwise.
1022 """
1023 result = self.run('[ -f /root/.factory_test ]', ignore_status=True)
1024 return result.exit_status == 0
1025
1026
1027 def _restart_ui(self):
1028 """Restarts ui.
1029
1030 @raises: FactoryImageCheckerException for factory images, since
1031 we cannot attempt to restart ui on them.
1032 error.AutoservRunError for any other type of error that
1033 occurs while restarting ui.
1034 """
1035 if self._is_factory_image():
1036 raise FactoryImageCheckerException('Cannot restart ui on factory '
1037 'images')
1038
Chris Sosaf4d43ff2012-10-30 11:21:05 -07001039 client_at = autotest.Autotest(self)
beepsc87ff602013-07-31 21:53:00 -07001040 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
1041 '_clear_login_prompt_state')
1042 self.run('restart ui')
1043 client_at.run_static_method('autotest_lib.client.cros.cros_ui',
1044 '_wait_for_login_prompt')
1045
1046
1047 def cleanup(self):
Richard Barnette82c35912012-11-20 10:09:10 -08001048 self.run('rm -f %s' % constants.CLEANUP_LOGS_PAUSED_FILE)
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001049 try:
beepsc87ff602013-07-31 21:53:00 -07001050 self._restart_ui()
1051 except (error.AutotestRunError, error.AutoservRunError,
1052 FactoryImageCheckerException):
Scott Zawalskiddbc31e2012-11-15 11:29:01 -05001053 logging.warn('Unable to restart ui, rebooting device.')
1054 # Since restarting the UI fails fall back to normal Autotest
1055 # cleanup routines, i.e. reboot the machine.
Fang Deng0ca40e22013-08-27 17:47:44 -07001056 super(CrosHost, self).cleanup()
Simran Basi5e6339a2013-03-21 11:34:32 -07001057 # Check if the rpm outlet was manipulated.
Simran Basid5e5e272012-09-24 15:23:59 -07001058 if self.has_power():
Simran Basi5e6339a2013-03-21 11:34:32 -07001059 self._cleanup_poweron()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001060
1061
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001062 def reboot(self, **dargs):
1063 """
1064 This function reboots the site host. The more generic
1065 RemoteHost.reboot() performs sync and sleeps for 5
1066 seconds. This is not necessary for Chrome OS devices as the
1067 sync should be finished in a short time during the reboot
1068 command.
1069 """
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001070 if 'reboot_cmd' not in dargs:
1071 dargs['reboot_cmd'] = ('((reboot & sleep 10; reboot -f &)'
1072 ' </dev/null >/dev/null 2>&1 &)')
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001073 # Enable fastsync to avoid running extra sync commands before reboot.
Tom Wai-Hong Tamf5cd1d42012-08-13 12:04:08 +08001074 if 'fastsync' not in dargs:
1075 dargs['fastsync'] = True
Fang Deng0ca40e22013-08-27 17:47:44 -07001076 super(CrosHost, self).reboot(**dargs)
Yu-Ju Honga2be94a2012-07-31 09:48:52 -07001077
1078
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001079 def verify_software(self):
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001080 """Verify working software on a Chrome OS system.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001081
Richard Barnetteb2bc13c2013-01-08 17:32:51 -08001082 Tests for the following conditions:
1083 1. All conditions tested by the parent version of this
1084 function.
1085 2. Sufficient space in /mnt/stateful_partition.
Fang Deng6b05f5b2013-03-20 13:42:11 -07001086 3. Sufficient space in /mnt/stateful_partition/encrypted.
1087 4. update_engine answers a simple status request over DBus.
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001088
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001089 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001090 super(CrosHost, self).verify_software()
J. Richard Barnette45e93de2012-04-11 17:24:15 -07001091 self.check_diskspace(
1092 '/mnt/stateful_partition',
1093 global_config.global_config.get_config_value(
Fang Deng6b05f5b2013-03-20 13:42:11 -07001094 'SERVER', 'gb_diskspace_required', type=float,
1095 default=20.0))
1096 self.check_diskspace(
1097 '/mnt/stateful_partition/encrypted',
1098 global_config.global_config.get_config_value(
1099 'SERVER', 'gb_encrypted_diskspace_required', type=float,
1100 default=0.1))
beepsc87ff602013-07-31 21:53:00 -07001101
1102 # Factory images don't run update engine,
1103 # goofy controls dbus on these DUTs.
1104 if not self._is_factory_image():
1105 self.run('update_engine_client --status')
Scott Zawalskifbca4a92013-03-04 15:56:42 -05001106 # Makes sure python is present, loads and can use built in functions.
1107 # We have seen cases where importing cPickle fails with undefined
1108 # symbols in cPickle.so.
1109 self.run('python -c "import cPickle"')
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001110
1111
Fang Deng96667ca2013-08-01 17:46:18 -07001112 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
1113 connect_timeout=None, alive_interval=None):
1114 """Override default make_ssh_command to use options tuned for Chrome OS.
1115
1116 Tuning changes:
1117 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
1118 connection failure. Consistency with remote_access.sh.
1119
1120 - ServerAliveInterval=180; which causes SSH to ping connection every
1121 180 seconds. In conjunction with ServerAliveCountMax ensures
1122 that if the connection dies, Autotest will bail out quickly.
1123 Originally tried 60 secs, but saw frequent job ABORTS where
1124 the test completed successfully.
1125
1126 - ServerAliveCountMax=3; consistency with remote_access.sh.
1127
1128 - ConnectAttempts=4; reduce flakiness in connection errors;
1129 consistency with remote_access.sh.
1130
1131 - UserKnownHostsFile=/dev/null; we don't care about the keys.
1132 Host keys change with every new installation, don't waste
1133 memory/space saving them.
1134
1135 - SSH protocol forced to 2; needed for ServerAliveInterval.
1136
1137 @param user User name to use for the ssh connection.
1138 @param port Port on the target host to use for ssh connection.
1139 @param opts Additional options to the ssh command.
1140 @param hosts_file Ignored.
1141 @param connect_timeout Ignored.
1142 @param alive_interval Ignored.
1143 """
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001144 base_command = ('/usr/bin/ssh -a -x %s %s %s'
1145 ' -o StrictHostKeyChecking=no'
Fang Deng96667ca2013-08-01 17:46:18 -07001146 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
1147 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
1148 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
1149 ' -o Protocol=2 -l %s -p %d')
Aviv Keshetc5947fa2013-09-04 14:06:29 -07001150 return base_command % (self._ssh_verbosity_flag, self._ssh_options,
1151 opts, user, port)
Fang Deng96667ca2013-08-01 17:46:18 -07001152
1153
beeps32a63082013-08-22 14:02:29 -07001154 def _create_ssh_tunnel(self, port, local_port):
1155 """Create an ssh tunnel from local_port to port.
1156
1157 @param port: remote port on the host.
1158 @param local_port: local forwarding port.
1159
1160 @return: the tunnel process.
1161 """
1162 # Chrome OS on the target closes down most external ports
1163 # for security. We could open the port, but doing that
1164 # would conflict with security tests that check that only
1165 # expected ports are open. So, to get to the port on the
1166 # target we use an ssh tunnel.
1167 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
1168 ssh_cmd = self.make_ssh_command(opts=tunnel_options)
1169 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
1170 logging.debug('Full tunnel command: %s', tunnel_cmd)
1171 tunnel_proc = subprocess.Popen(tunnel_cmd, shell=True, close_fds=True)
1172 logging.debug('Started ssh tunnel, local = %d'
1173 ' remote = %d, pid = %d',
1174 local_port, port, tunnel_proc.pid)
1175 return tunnel_proc
1176
1177
Christopher Wileydd181852013-10-10 19:56:58 -07001178 def _setup_rpc(self, port, command_name, remote_pid=None):
beeps32a63082013-08-22 14:02:29 -07001179 """Sets up a tunnel process and performs rpc connection book keeping.
1180
1181 This method assumes that xmlrpc and jsonrpc never conflict, since
1182 we can only either have an xmlrpc or a jsonrpc server listening on
1183 a remote port. As such, it enforces a single proxy->remote port
1184 policy, i.e if one starts a jsonrpc proxy/server from port A->B,
1185 and then tries to start an xmlrpc proxy forwarded to the same port,
1186 the xmlrpc proxy will override the jsonrpc tunnel process, however:
1187
1188 1. None of the methods on the xmlrpc proxy will work because
1189 the server listening on B is jsonrpc.
1190
1191 2. The xmlrpc client cannot initiate a termination of the JsonRPC
1192 server, as the only use case currently is goofy, which is tied to
1193 the factory image. It is much easier to handle a failed xmlrpc
1194 call on the client than it is to terminate goofy in this scenario,
1195 as doing the latter might leave the DUT in a hard to recover state.
1196
1197 With the current implementation newer rpc proxy connections will
1198 terminate the tunnel processes of older rpc connections tunneling
1199 to the same remote port. If methods are invoked on the client
1200 after this has happened they will fail with connection closed errors.
1201
1202 @param port: The remote forwarding port.
1203 @param command_name: The name of the remote process, to terminate
1204 using pkill.
1205
1206 @return A url that we can use to initiate the rpc connection.
1207 """
1208 self.rpc_disconnect(port)
1209 local_port = utils.get_unused_port()
1210 tunnel_proc = self._create_ssh_tunnel(port, local_port)
Christopher Wileydd181852013-10-10 19:56:58 -07001211 self._rpc_proxy_map[port] = (command_name, tunnel_proc, remote_pid)
beeps32a63082013-08-22 14:02:29 -07001212 return self._RPC_PROXY_URL % local_port
1213
1214
Christopher Wileyd78249a2013-03-01 13:05:31 -08001215 def xmlrpc_connect(self, command, port, command_name=None,
1216 ready_test_name=None, timeout_seconds=10):
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001217 """Connect to an XMLRPC server on the host.
1218
1219 The `command` argument should be a simple shell command that
1220 starts an XMLRPC server on the given `port`. The command
1221 must not daemonize, and must terminate cleanly on SIGTERM.
1222 The command is started in the background on the host, and a
1223 local XMLRPC client for the server is created and returned
1224 to the caller.
1225
1226 Note that the process of creating an XMLRPC client makes no
1227 attempt to connect to the remote server; the caller is
1228 responsible for determining whether the server is running
1229 correctly, and is ready to serve requests.
1230
Christopher Wileyd78249a2013-03-01 13:05:31 -08001231 Optionally, the caller can pass ready_test_name, a string
1232 containing the name of a method to call on the proxy. This
1233 method should take no parameters and return successfully only
1234 when the server is ready to process client requests. When
1235 ready_test_name is set, xmlrpc_connect will block until the
1236 proxy is ready, and throw a TestError if the server isn't
1237 ready by timeout_seconds.
1238
beeps32a63082013-08-22 14:02:29 -07001239 If a server is already running on the remote port, this
1240 method will kill it and disconnect the tunnel process
1241 associated with the connection before establishing a new one,
1242 by consulting the rpc_proxy_map in rpc_disconnect.
1243
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001244 @param command Shell command to start the server.
1245 @param port Port number on which the server is expected to
1246 be serving.
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001247 @param command_name String to use as input to `pkill` to
1248 terminate the XMLRPC server on the host.
Christopher Wileyd78249a2013-03-01 13:05:31 -08001249 @param ready_test_name String containing the name of a
1250 method defined on the XMLRPC server.
1251 @param timeout_seconds Number of seconds to wait
1252 for the server to become 'ready.' Will throw a
1253 TestFail error if server is not ready in time.
1254
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001255 """
Christopher Wileyc14f06a2013-10-16 13:55:39 -07001256 # Clean up any existing state. If the caller is willing
1257 # to believe their server is down, we ought to clean up
1258 # any tunnels we might have sitting around.
1259 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001260 # Start the server on the host. Redirection in the command
1261 # below is necessary, because 'ssh' won't terminate until
1262 # background child processes close stdin, stdout, and
1263 # stderr.
Christopher Wileydd181852013-10-10 19:56:58 -07001264 remote_cmd = '%s </dev/null >/dev/null 2>&1 & echo $!' % command
1265 remote_pid = self.run(remote_cmd).stdout.rstrip('\n')
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001266 logging.debug('Started XMLRPC server on host %s, pid = %s',
1267 self.hostname, remote_pid)
1268
Christopher Wileydd181852013-10-10 19:56:58 -07001269 # Tunnel through SSH to be able to reach that remote port.
1270 rpc_url = self._setup_rpc(port, command_name, remote_pid=remote_pid)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001271 proxy = xmlrpclib.ServerProxy(rpc_url, allow_none=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001272
Christopher Wileyd78249a2013-03-01 13:05:31 -08001273 if ready_test_name is not None:
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001274 # retry.retry logs each attempt; calculate delay_sec to
1275 # keep log spam to a dull roar.
Christopher Wiley0ed712b2013-04-09 15:25:12 -07001276 @retry.retry((socket.error,
1277 xmlrpclib.ProtocolError,
1278 httplib.BadStatusLine),
Christopher Wileyd78249a2013-03-01 13:05:31 -08001279 timeout_min=timeout_seconds/60.0,
J. Richard Barnette13eb7c02013-03-07 12:06:29 -08001280 delay_sec=min(max(timeout_seconds/20.0, 0.1), 1))
Christopher Wileyd78249a2013-03-01 13:05:31 -08001281 def ready_test():
1282 """ Call proxy.ready_test_name(). """
1283 getattr(proxy, ready_test_name)()
1284 successful = False
1285 try:
1286 logging.info('Waiting %d seconds for XMLRPC server '
1287 'to start.', timeout_seconds)
1288 ready_test()
1289 successful = True
Christopher Wileyd78249a2013-03-01 13:05:31 -08001290 finally:
1291 if not successful:
1292 logging.error('Failed to start XMLRPC server.')
beeps32a63082013-08-22 14:02:29 -07001293 self.rpc_disconnect(port)
Christopher Wileyd78249a2013-03-01 13:05:31 -08001294 logging.info('XMLRPC server started successfully.')
1295 return proxy
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001296
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001297
beeps32a63082013-08-22 14:02:29 -07001298 def jsonrpc_connect(self, port):
1299 """Creates a jsonrpc proxy connection through an ssh tunnel.
1300
1301 This method exists to facilitate communication with goofy (which is
1302 the default system manager on all factory images) and as such, leaves
1303 most of the rpc server sanity checking to the caller. Unlike
1304 xmlrpc_connect, this method does not facilitate the creation of a remote
1305 jsonrpc server, as the only clients of this code are factory tests,
1306 for which the goofy system manager is built in to the image and starts
1307 when the target boots.
1308
1309 One can theoretically create multiple jsonrpc proxies all forwarded
1310 to the same remote port, provided the remote port has an rpc server
1311 listening. However, in doing so we stand the risk of leaking an
1312 existing tunnel process, so we always disconnect any older tunnels
1313 we might have through rpc_disconnect.
1314
1315 @param port: port on the remote host that is serving this proxy.
1316
1317 @return: The client proxy.
1318 """
1319 if not jsonrpclib:
1320 logging.warning('Jsonrpclib could not be imported. Check that '
1321 'site-packages contains jsonrpclib.')
1322 return None
1323
1324 proxy = jsonrpclib.jsonrpc.ServerProxy(self._setup_rpc(port, None))
1325
1326 logging.info('Established a jsonrpc connection through port %s.', port)
1327 return proxy
1328
1329
1330 def rpc_disconnect(self, port):
1331 """Disconnect from an RPC server on the host.
1332
1333 Terminates the remote RPC server previously started for
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001334 the given `port`. Also closes the local ssh tunnel created
1335 for the connection to the host. This function does not
beeps32a63082013-08-22 14:02:29 -07001336 directly alter the state of a previously returned RPC
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001337 client object; however disconnection will cause all
1338 subsequent calls to methods on the object to fail.
1339
1340 This function does nothing if requested to disconnect a port
beeps32a63082013-08-22 14:02:29 -07001341 that was not previously connected via _setup_rpc.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001342
1343 @param port Port number passed to a previous call to
beeps32a63082013-08-22 14:02:29 -07001344 `_setup_rpc()`.
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001345 """
beeps32a63082013-08-22 14:02:29 -07001346 if port not in self._rpc_proxy_map:
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001347 return
Christopher Wileydd181852013-10-10 19:56:58 -07001348 remote_name, tunnel_proc, remote_pid = self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001349 if remote_name:
1350 # We use 'pkill' to find our target process rather than
1351 # a PID, because the host may have rebooted since
1352 # connecting, and we don't want to kill an innocent
1353 # process with the same PID.
1354 #
1355 # 'pkill' helpfully exits with status 1 if no target
1356 # process is found, for which run() will throw an
Simran Basid5e5e272012-09-24 15:23:59 -07001357 # exception. We don't want that, so we the ignore
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001358 # status.
1359 self.run("pkill -f '%s'" % remote_name, ignore_status=True)
Christopher Wileydd181852013-10-10 19:56:58 -07001360 if remote_pid:
1361 logging.info('Waiting for RPC server "%s" shutdown',
1362 remote_name)
1363 start_time = time.time()
1364 while (time.time() - start_time <
1365 self._RPC_SHUTDOWN_TIMEOUT_SECONDS):
1366 running_processes = self.run(
1367 "pgrep -f '%s'" % remote_name,
1368 ignore_status=True).stdout.split()
1369 if not remote_pid in running_processes:
1370 logging.info('Shut down RPC server.')
1371 break
1372 time.sleep(self._RPC_SHUTDOWN_POLLING_PERIOD_SECONDS)
1373 else:
1374 raise error.TestError('Failed to shutdown RPC server %s' %
1375 remote_name)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001376
1377 if tunnel_proc.poll() is None:
1378 tunnel_proc.terminate()
1379 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
1380 else:
1381 logging.debug('Tunnel pid %d terminated early, status %d',
1382 tunnel_proc.pid, tunnel_proc.returncode)
beeps32a63082013-08-22 14:02:29 -07001383 del self._rpc_proxy_map[port]
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001384
1385
beeps32a63082013-08-22 14:02:29 -07001386 def rpc_disconnect_all(self):
1387 """Disconnect all known RPC proxy ports."""
1388 for port in self._rpc_proxy_map.keys():
1389 self.rpc_disconnect(port)
J. Richard Barnette1d78b012012-05-15 13:56:30 -07001390
1391
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001392 def _ping_check_status(self, status):
1393 """Ping the host once, and return whether it has a given status.
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001394
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001395 @param status Check the ping status against this value.
1396 @return True iff `status` and the result of ping are the same
1397 (i.e. both True or both False).
1398
1399 """
1400 ping_val = utils.ping(self.hostname, tries=1, deadline=1)
1401 return not (status ^ (ping_val == 0))
1402
1403 def _ping_wait_for_status(self, status, timeout):
1404 """Wait for the host to have a given status (UP or DOWN).
1405
1406 Status is checked by polling. Polling will not last longer
1407 than the number of seconds in `timeout`. The polling
1408 interval will be long enough that only approximately
1409 _PING_WAIT_COUNT polling cycles will be executed, subject
1410 to a maximum interval of about one minute.
1411
1412 @param status Waiting will stop immediately if `ping` of the
1413 host returns this status.
1414 @param timeout Poll for at most this many seconds.
1415 @return True iff the host status from `ping` matched the
1416 requested status at the time of return.
1417
1418 """
1419 # _ping_check_status() takes about 1 second, hence the
1420 # "- 1" in the formula below.
1421 poll_interval = min(int(timeout / self._PING_WAIT_COUNT), 60) - 1
1422 end_time = time.time() + timeout
1423 while time.time() <= end_time:
1424 if self._ping_check_status(status):
1425 return True
1426 if poll_interval > 0:
1427 time.sleep(poll_interval)
1428
1429 # The last thing we did was sleep(poll_interval), so it may
1430 # have been too long since the last `ping`. Check one more
1431 # time, just to be sure.
1432 return self._ping_check_status(status)
1433
1434 def ping_wait_up(self, timeout):
1435 """Wait for the host to respond to `ping`.
1436
1437 N.B. This method is not a reliable substitute for
1438 `wait_up()`, because a host that responds to ping will not
1439 necessarily respond to ssh. This method should only be used
1440 if the target DUT can be considered functional even if it
1441 can't be reached via ssh.
1442
1443 @param timeout Minimum time to allow before declaring the
1444 host to be non-responsive.
1445 @return True iff the host answered to ping before the timeout.
1446
1447 """
1448 return self._ping_wait_for_status(self._PING_STATUS_UP, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001449
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001450 def ping_wait_down(self, timeout):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001451 """Wait until the host no longer responds to `ping`.
1452
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001453 This function can be used as a slightly faster version of
1454 `wait_down()`, by avoiding potentially long ssh timeouts.
1455
1456 @param timeout Minimum time to allow for the host to become
1457 non-responsive.
1458 @return True iff the host quit answering ping before the
1459 timeout.
1460
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001461 """
J. Richard Barnetteb6de7e32013-02-14 13:28:04 -08001462 return self._ping_wait_for_status(self._PING_STATUS_DOWN, timeout)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001463
1464 def test_wait_for_sleep(self):
1465 """Wait for the client to enter low-power sleep mode.
1466
1467 The test for "is asleep" can't distinguish a system that is
1468 powered off; to confirm that the unit was asleep, it is
1469 necessary to force resume, and then call
1470 `test_wait_for_resume()`.
1471
1472 This function is expected to be called from a test as part
1473 of a sequence like the following:
1474
1475 ~~~~~~~~
1476 boot_id = host.get_boot_id()
1477 # trigger sleep on the host
1478 host.test_wait_for_sleep()
1479 # trigger resume on the host
1480 host.test_wait_for_resume(boot_id)
1481 ~~~~~~~~
1482
1483 @exception TestFail The host did not go to sleep within
1484 the allowed time.
1485 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001486 if not self.ping_wait_down(timeout=self.SLEEP_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001487 raise error.TestFail(
1488 'client failed to sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001489 self.SLEEP_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001490
1491
1492 def test_wait_for_resume(self, old_boot_id):
1493 """Wait for the client to resume from low-power sleep mode.
1494
1495 The `old_boot_id` parameter should be the value from
1496 `get_boot_id()` obtained prior to entering sleep mode. A
1497 `TestFail` exception is raised if the boot id changes.
1498
1499 See @ref test_wait_for_sleep for more on this function's
1500 usage.
1501
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001502 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001503 target host went to sleep.
1504
1505 @exception TestFail The host did not respond within the
1506 allowed time.
1507 @exception TestFail The host responded, but the boot id test
1508 indicated a reboot rather than a sleep
1509 cycle.
1510 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001511 if not self.wait_up(timeout=self.RESUME_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001512 raise error.TestFail(
1513 'client failed to resume from sleep after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001514 self.RESUME_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001515 else:
1516 new_boot_id = self.get_boot_id()
1517 if new_boot_id != old_boot_id:
1518 raise error.TestFail(
1519 'client rebooted, but sleep was expected'
1520 ' (old boot %s, new boot %s)'
1521 % (old_boot_id, new_boot_id))
1522
1523
1524 def test_wait_for_shutdown(self):
1525 """Wait for the client to shut down.
1526
1527 The test for "has shut down" can't distinguish a system that
1528 is merely asleep; to confirm that the unit was down, it is
1529 necessary to force boot, and then call test_wait_for_boot().
1530
1531 This function is expected to be called from a test as part
1532 of a sequence like the following:
1533
1534 ~~~~~~~~
1535 boot_id = host.get_boot_id()
1536 # trigger shutdown on the host
1537 host.test_wait_for_shutdown()
1538 # trigger boot on the host
1539 host.test_wait_for_boot(boot_id)
1540 ~~~~~~~~
1541
1542 @exception TestFail The host did not shut down within the
1543 allowed time.
1544 """
Andrew Bresticker678c0c72013-01-22 10:44:09 -08001545 if not self.ping_wait_down(timeout=self.SHUTDOWN_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001546 raise error.TestFail(
1547 'client failed to shut down after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001548 self.SHUTDOWN_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001549
1550
1551 def test_wait_for_boot(self, old_boot_id=None):
1552 """Wait for the client to boot from cold power.
1553
1554 The `old_boot_id` parameter should be the value from
1555 `get_boot_id()` obtained prior to shutting down. A
1556 `TestFail` exception is raised if the boot id does not
1557 change. The boot id test is omitted if `old_boot_id` is not
1558 specified.
1559
1560 See @ref test_wait_for_shutdown for more on this function's
1561 usage.
1562
J. Richard Barnette7214e0b2013-02-06 15:20:49 -08001563 @param old_boot_id A boot id value obtained before the
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001564 shut down.
1565
1566 @exception TestFail The host did not respond within the
1567 allowed time.
1568 @exception TestFail The host responded, but the boot id test
1569 indicated that there was no reboot.
1570 """
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001571 if not self.wait_up(timeout=self.REBOOT_TIMEOUT):
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001572 raise error.TestFail(
1573 'client failed to reboot after %d seconds' %
J. Richard Barnetteeb69d722012-06-18 17:29:44 -07001574 self.REBOOT_TIMEOUT)
J. Richard Barnette134ec2c2012-04-25 12:59:37 -07001575 elif old_boot_id:
1576 if self.get_boot_id() == old_boot_id:
1577 raise error.TestFail(
1578 'client is back up, but did not reboot'
1579 ' (boot %s)' % old_boot_id)
Simran Basid5e5e272012-09-24 15:23:59 -07001580
1581
1582 @staticmethod
1583 def check_for_rpm_support(hostname):
1584 """For a given hostname, return whether or not it is powered by an RPM.
1585
Simran Basi1df55112013-09-06 11:25:09 -07001586 @param hostname: hostname to check for rpm support.
1587
Simran Basid5e5e272012-09-24 15:23:59 -07001588 @return None if this host does not follows the defined naming format
1589 for RPM powered DUT's in the lab. If it does follow the format,
1590 it returns a regular expression MatchObject instead.
1591 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001592 return re.match(CrosHost._RPM_HOSTNAME_REGEX, hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001593
1594
1595 def has_power(self):
1596 """For this host, return whether or not it is powered by an RPM.
1597
1598 @return True if this host is in the CROS lab and follows the defined
1599 naming format.
1600 """
Fang Deng0ca40e22013-08-27 17:47:44 -07001601 return CrosHost.check_for_rpm_support(self.hostname)
Simran Basid5e5e272012-09-24 15:23:59 -07001602
1603
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001604 def _set_power(self, state, power_method):
1605 """Sets the power to the host via RPM, Servo or manual.
1606
1607 @param state Specifies which power state to set to DUT
1608 @param power_method Specifies which method of power control to
1609 use. By default "RPM" will be used. Valid values
1610 are the strings "RPM", "manual", "servoj10".
1611
1612 """
1613 ACCEPTABLE_STATES = ['ON', 'OFF']
1614
1615 if state.upper() not in ACCEPTABLE_STATES:
1616 raise error.TestError('State must be one of: %s.'
1617 % (ACCEPTABLE_STATES,))
1618
1619 if power_method == self.POWER_CONTROL_SERVO:
1620 logging.info('Setting servo port J10 to %s', state)
1621 self.servo.set('prtctl3_pwren', state.lower())
1622 time.sleep(self._USB_POWER_TIMEOUT)
1623 elif power_method == self.POWER_CONTROL_MANUAL:
1624 logging.info('You have %d seconds to set the AC power to %s.',
1625 self._POWER_CYCLE_TIMEOUT, state)
1626 time.sleep(self._POWER_CYCLE_TIMEOUT)
1627 else:
1628 if not self.has_power():
1629 raise error.TestFail('DUT does not have RPM connected.')
Simran Basi5e6339a2013-03-21 11:34:32 -07001630 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
1631 afe.set_host_attribute(self._RPM_OUTLET_CHANGED, True,
1632 hostname=self.hostname)
Simran Basi1df55112013-09-06 11:25:09 -07001633 rpm_client.set_power(self.hostname, state.upper(), timeout_mins=5)
Simran Basid5e5e272012-09-24 15:23:59 -07001634
1635
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001636 def power_off(self, power_method=POWER_CONTROL_RPM):
1637 """Turn off power to this host via RPM, Servo or manual.
1638
1639 @param power_method Specifies which method of power control to
1640 use. By default "RPM" will be used. Valid values
1641 are the strings "RPM", "manual", "servoj10".
1642
1643 """
1644 self._set_power('OFF', power_method)
Simran Basid5e5e272012-09-24 15:23:59 -07001645
1646
Ismail Noorbasha07fdb612013-02-14 14:13:31 -08001647 def power_on(self, power_method=POWER_CONTROL_RPM):
1648 """Turn on power to this host via RPM, Servo or manual.
1649
1650 @param power_method Specifies which method of power control to
1651 use. By default "RPM" will be used. Valid values
1652 are the strings "RPM", "manual", "servoj10".
1653
1654 """
1655 self._set_power('ON', power_method)
1656
1657
1658 def power_cycle(self, power_method=POWER_CONTROL_RPM):
1659 """Cycle power to this host by turning it OFF, then ON.
1660
1661 @param power_method Specifies which method of power control to
1662 use. By default "RPM" will be used. Valid values
1663 are the strings "RPM", "manual", "servoj10".
1664
1665 """
1666 if power_method in (self.POWER_CONTROL_SERVO,
1667 self.POWER_CONTROL_MANUAL):
1668 self.power_off(power_method=power_method)
1669 time.sleep(self._POWER_CYCLE_TIMEOUT)
1670 self.power_on(power_method=power_method)
1671 else:
1672 rpm_client.set_power(self.hostname, 'CYCLE')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001673
1674
1675 def get_platform(self):
1676 """Determine the correct platform label for this host.
1677
1678 @returns a string representing this host's platform.
1679 """
1680 crossystem = utils.Crossystem(self)
1681 crossystem.init()
1682 # Extract fwid value and use the leading part as the platform id.
1683 # fwid generally follow the format of {platform}.{firmware version}
1684 # Example: Alex.X.YYY.Z or Google_Alex.X.YYY.Z
1685 platform = crossystem.fwid().split('.')[0].lower()
1686 # Newer platforms start with 'Google_' while the older ones do not.
1687 return platform.replace('google_', '')
1688
1689
Aviv Keshet74c89a92013-02-04 15:18:30 -08001690 @label_decorator()
Simran Basic6f1f7a2012-10-16 10:47:46 -07001691 def get_board(self):
1692 """Determine the correct board label for this host.
1693
1694 @returns a string representing this host's board.
1695 """
1696 release_info = utils.parse_cmd_output('cat /etc/lsb-release',
1697 run_method=self.run)
1698 board = release_info['CHROMEOS_RELEASE_BOARD']
1699 # Devices in the lab generally have the correct board name but our own
1700 # development devices have {board_name}-signed-{key_type}. The board
1701 # name may also begin with 'x86-' which we need to keep.
Simran Basi833814b2013-01-29 13:13:43 -08001702 board_format_string = ds_constants.BOARD_PREFIX + '%s'
Simran Basic6f1f7a2012-10-16 10:47:46 -07001703 if 'x86' not in board:
Simran Basi833814b2013-01-29 13:13:43 -08001704 return board_format_string % board.split('-')[0]
1705 return board_format_string % '-'.join(board.split('-')[0:2])
Simran Basic6f1f7a2012-10-16 10:47:46 -07001706
1707
Aviv Keshet74c89a92013-02-04 15:18:30 -08001708 @label_decorator('lightsensor')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001709 def has_lightsensor(self):
1710 """Determine the correct board label for this host.
1711
1712 @returns the string 'lightsensor' if this host has a lightsensor or
1713 None if it does not.
1714 """
1715 search_cmd = "find -L %s -maxdepth 4 | egrep '%s'" % (
Richard Barnette82c35912012-11-20 10:09:10 -08001716 self._LIGHTSENSOR_SEARCH_DIR, '|'.join(self._LIGHTSENSOR_FILES))
Simran Basic6f1f7a2012-10-16 10:47:46 -07001717 try:
1718 # Run the search cmd following the symlinks. Stderr_tee is set to
1719 # None as there can be a symlink loop, but the command will still
1720 # execute correctly with a few messages printed to stderr.
1721 self.run(search_cmd, stdout_tee=None, stderr_tee=None)
1722 return 'lightsensor'
1723 except error.AutoservRunError:
1724 # egrep exited with a return code of 1 meaning none of the possible
1725 # lightsensor files existed.
1726 return None
1727
1728
Aviv Keshet74c89a92013-02-04 15:18:30 -08001729 @label_decorator('bluetooth')
Simran Basic6f1f7a2012-10-16 10:47:46 -07001730 def has_bluetooth(self):
1731 """Determine the correct board label for this host.
1732
1733 @returns the string 'bluetooth' if this host has bluetooth or
1734 None if it does not.
1735 """
1736 try:
1737 self.run('test -d /sys/class/bluetooth/hci0')
1738 # test exited with a return code of 0.
1739 return 'bluetooth'
1740 except error.AutoservRunError:
1741 # test exited with a return code 1 meaning the directory did not
1742 # exist.
1743 return None
1744
1745
Ilja Friedel0ce0b602013-08-15 18:45:27 -07001746 @label_decorator('graphics')
1747 def get_graphics(self):
1748 """
1749 Determine the correct board label for this host.
1750
1751 @returns a string representing this host's graphics. For now ARM boards
1752 return graphics:gles while all other boards return graphics:gl. This
1753 may change over time, but for robustness reasons this should avoid
1754 executing code in actual graphics libraries (which may not be ready and
1755 is tested by graphics_GLAPICheck).
1756 """
1757 uname = self.run('uname -a').stdout.lower()
1758 if 'arm' in uname:
1759 return 'graphics:gles'
1760 return 'graphics:gl'
1761
1762
Simran Basic6f1f7a2012-10-16 10:47:46 -07001763 def get_labels(self):
1764 """Return a list of labels for this given host.
1765
1766 This is the main way to retrieve all the automatic labels for a host
1767 as it will run through all the currently implemented label functions.
1768 """
1769 labels = []
Richard Barnette82c35912012-11-20 10:09:10 -08001770 for label_function in self._LABEL_FUNCTIONS:
Simran Basic6f1f7a2012-10-16 10:47:46 -07001771 label = label_function(self)
1772 if label:
1773 labels.append(label)
1774 return labels