blob: 1008e0d5b2e772405622eedc58d6f234dc84048e [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
Dan Shi0942b1d2015-03-31 11:07:00 -070021from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080022from autotest_lib.client.common_lib.cros import autoupdater
23from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000027from autotest_lib.client.cros import constants as client_constants
beeps5e8c45a2013-12-17 22:05:11 -080028from autotest_lib.server import site_utils as server_site_utils
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080029from autotest_lib.server.cros import dnsname_mangler
Fang Deng5d518f42013-08-02 14:04:32 -070030from autotest_lib.server.cros.servo import servo
Simran Basi0739d682015-02-25 16:22:56 -080031from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Fang Deng5d518f42013-08-02 14:04:32 -070032from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070033from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070034
35
Simran Basi0739d682015-02-25 16:22:56 -080036# Names of the host attributes in the database that represent the values for
37# the servo_host and servo_port for a servo connected to the DUT.
38SERVO_HOST_ATTR = 'servo_host'
39SERVO_PORT_ATTR = 'servo_port'
40
41
Fang Deng5d518f42013-08-02 14:04:32 -070042class ServoHostException(error.AutoservError):
43 """This is the base class for exceptions raised by ServoHost."""
44 pass
45
46
47class ServoHostVerifyFailure(ServoHostException):
48 """Raised when servo verification fails."""
49 pass
50
51
Fang Dengd4fe7392013-09-20 12:18:21 -070052class ServoHostRepairFailure(ServoHostException):
53 """Raised when a repair method fails to repair a servo host."""
54 pass
55
56
Fang Dengf0ea6142013-10-10 21:43:16 -070057class ServoHostRepairMethodNA(ServoHostException):
58 """Raised when a repair method is not applicable."""
59 pass
60
61
Fang Deng5d518f42013-08-02 14:04:32 -070062class ServoHostRepairTotalFailure(ServoHostException):
63 """Raised if all attempts to repair a servo host fail."""
64 pass
65
66
67def make_servo_hostname(dut_hostname):
68 """Given a DUT's hostname, return the hostname of its servo.
69
70 @param dut_hostname: hostname of a DUT.
71
72 @return hostname of the DUT's servo.
73
74 """
75 host_parts = dut_hostname.split('.')
76 host_parts[0] = host_parts[0] + '-servo'
77 return '.'.join(host_parts)
78
79
80class ServoHost(ssh_host.SSHHost):
81 """Host class for a host that controls a servo, e.g. beaglebone."""
82
83 # Timeout for getting the value of 'pwr_button'.
84 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
85 # Timeout for rebooting servo host.
86 REBOOT_TIMEOUT_SECS = 90
87 HOST_DOWN_TIMEOUT_SECS = 60
88 # Delay after rebooting for servod to become fully functional.
89 REBOOT_DELAY_SECS = 20
90 # Servod process name.
91 SERVOD_PROCESS = 'servod'
Dan Shie5b3c512014-08-21 12:12:09 -070092 # Timeout for initializing servo signals.
93 INITIALIZE_SERVO_TIMEOUT_SECS = 30
Fang Deng5d518f42013-08-02 14:04:32 -070094
Fang Dengd4fe7392013-09-20 12:18:21 -070095 _MAX_POWER_CYCLE_ATTEMPTS = 3
Gabe Black1e1c41b2015-02-04 23:55:15 -080096 _timer = autotest_stats.Timer('servo_host')
Fang Dengd4fe7392013-09-20 12:18:21 -070097
Fang Deng5d518f42013-08-02 14:04:32 -070098
99 def _initialize(self, servo_host='localhost', servo_port=9999,
Dan Shi4d478522014-02-14 13:46:32 -0800100 required_by_test=True, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700101 """Initialize a ServoHost instance.
102
103 A ServoHost instance represents a host that controls a servo.
104
105 @param servo_host: Name of the host where the servod process
106 is running.
107 @param servo_port: Port the servod process is listening on.
Dan Shi4d478522014-02-14 13:46:32 -0800108 @param required_by_test: True if servo is required by test.
109 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
110 to None, for which utils.host_is_in_lab_zone will be
111 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700112
113 """
114 super(ServoHost, self)._initialize(hostname=servo_host,
115 *args, **dargs)
Dan Shi4d478522014-02-14 13:46:32 -0800116 if is_in_lab is None:
117 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
118 else:
119 self._is_in_lab = is_in_lab
Fang Deng5d518f42013-08-02 14:04:32 -0700120 self._is_localhost = (self.hostname == 'localhost')
121 remote = 'http://%s:%s' % (self.hostname, servo_port)
122 self._servod_server = xmlrpclib.ServerProxy(remote)
123 # Commands on the servo host must be run by the superuser. Our account
124 # on Beaglebone is root, but locally we might be running as a
125 # different user. If so - `sudo ' will have to be added to the
126 # commands.
127 if self._is_localhost:
128 self._sudo_required = utils.system_output('id -u') != '0'
129 else:
130 self._sudo_required = False
Dan Shi4d478522014-02-14 13:46:32 -0800131 # Create a cache of Servo object. This must be called at the end of
132 # _initialize to make sure all attributes are set.
133 self._servo = None
134 try:
135 self.verify()
Alex Millercc589692014-04-21 18:00:22 -0700136 except Exception:
Dan Shibbb0cb62014-03-24 17:50:57 -0700137 if required_by_test:
138 if not self.is_in_lab():
139 raise
140 else:
141 self.repair_full()
Fang Deng5d518f42013-08-02 14:04:32 -0700142
143
144 def is_in_lab(self):
145 """Check whether the servo host is a lab device.
146
147 @returns: True if the servo host is in Cros Lab, otherwise False.
148
149 """
150 return self._is_in_lab
151
152
153 def is_localhost(self):
154 """Checks whether the servo host points to localhost.
155
156 @returns: True if it points to localhost, otherwise False.
157
158 """
159 return self._is_localhost
160
161
162 def get_servod_server_proxy(self):
163 """Return a proxy that can be used to communicate with servod server.
164
165 @returns: An xmlrpclib.ServerProxy that is connected to the servod
166 server on the host.
167
168 """
169 return self._servod_server
170
171
172 def get_wait_up_processes(self):
173 """Get the list of local processes to wait for in wait_up.
174
175 Override get_wait_up_processes in
176 autotest_lib.client.common_lib.hosts.base_classes.Host.
177 Wait for servod process to go up. Called by base class when
178 rebooting the device.
179
180 """
181 processes = [self.SERVOD_PROCESS]
182 return processes
183
184
beeps5e8c45a2013-12-17 22:05:11 -0800185 def _is_cros_host(self):
186 """Check if a servo host is running chromeos.
187
188 @return: True if the servo host is running chromeos.
189 False if it isn't, or we don't have enough information.
190 """
191 try:
192 result = self.run('grep -q CHROMEOS /etc/lsb-release',
193 ignore_status=True, timeout=10)
194 except (error.AutoservRunError, error.AutoservSSHTimeout):
195 return False
196 return result.exit_status == 0
197
198
Fang Deng5d518f42013-08-02 14:04:32 -0700199 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
200 connect_timeout=None, alive_interval=None):
201 """Override default make_ssh_command to use tuned options.
202
203 Tuning changes:
204 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
205 connection failure. Consistency with remote_access.py.
206
207 - ServerAliveInterval=180; which causes SSH to ping connection every
208 180 seconds. In conjunction with ServerAliveCountMax ensures
209 that if the connection dies, Autotest will bail out quickly.
210
211 - ServerAliveCountMax=3; consistency with remote_access.py.
212
213 - ConnectAttempts=4; reduce flakiness in connection errors;
214 consistency with remote_access.py.
215
216 - UserKnownHostsFile=/dev/null; we don't care about the keys.
217
218 - SSH protocol forced to 2; needed for ServerAliveInterval.
219
220 @param user User name to use for the ssh connection.
221 @param port Port on the target host to use for ssh connection.
222 @param opts Additional options to the ssh command.
223 @param hosts_file Ignored.
224 @param connect_timeout Ignored.
225 @param alive_interval Ignored.
226
227 @returns: An ssh command with the requested settings.
228
229 """
230 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
231 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
232 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
233 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
234 ' -o Protocol=2 -l %s -p %d')
235 return base_command % (opts, user, port)
236
237
238 def _make_scp_cmd(self, sources, dest):
239 """Format scp command.
240
241 Given a list of source paths and a destination path, produces the
242 appropriate scp command for encoding it. Remote paths must be
243 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
244 to allow additional ssh options.
245
246 @param sources: A list of source paths to copy from.
247 @param dest: Destination path to copy to.
248
249 @returns: An scp command that copies |sources| on local machine to
250 |dest| on the remote servo host.
251
252 """
253 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
254 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
255 return command % (self.master_ssh_option,
256 self.port, ' '.join(sources), dest)
257
258
259 def run(self, command, timeout=3600, ignore_status=False,
260 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
261 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
262 """Run a command on the servo host.
263
264 Extends method `run` in SSHHost. If the servo host is a remote device,
265 it will call `run` in SSHost without changing anything.
266 If the servo host is 'localhost', it will call utils.system_output.
267
268 @param command: The command line string.
269 @param timeout: Time limit in seconds before attempting to
270 kill the running process. The run() function
271 will take a few seconds longer than 'timeout'
272 to complete if it has to kill the process.
273 @param ignore_status: Do not raise an exception, no matter
274 what the exit code of the command is.
275 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
276 @param connect_timeout: SSH connection timeout (in seconds)
277 Ignored if host is 'localhost'.
278 @param options: String with additional ssh command options
279 Ignored if host is 'localhost'.
280 @param stdin: Stdin to pass (a string) to the executed command.
281 @param verbose: Log the commands.
282 @param args: Sequence of strings to pass as arguments to command by
283 quoting them in " and escaping their contents if necessary.
284
285 @returns: A utils.CmdResult object.
286
287 @raises AutoservRunError if the command failed.
288 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
289 when servo host is not 'localhost'.
290
291 """
292 run_args = {'command': command, 'timeout': timeout,
293 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
294 'stderr_tee': stderr_tee, 'stdin': stdin,
295 'verbose': verbose, 'args': args}
296 if self.is_localhost():
297 if self._sudo_required:
298 run_args['command'] = 'sudo -n %s' % command
299 try:
300 return utils.run(**run_args)
301 except error.CmdError as e:
302 logging.error(e)
303 raise error.AutoservRunError('command execution error',
304 e.result_obj)
305 else:
306 run_args['connect_timeout'] = connect_timeout
307 run_args['options'] = options
308 return super(ServoHost, self).run(**run_args)
309
310
Dan Shi33412a82014-06-10 15:12:27 -0700311 @_timer.decorate
Fang Deng5d518f42013-08-02 14:04:32 -0700312 def _check_servod(self):
313 """A sanity check of the servod state."""
314 msg_prefix = 'Servod error: %s'
315 error_msg = None
316 try:
317 timeout, _ = retry.timeout(
318 self._servod_server.get, args=('pwr_button', ),
319 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
320 if timeout:
321 error_msg = msg_prefix % 'Request timed out.'
322 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
323 error_msg = msg_prefix % e
324 if error_msg:
325 raise ServoHostVerifyFailure(error_msg)
326
327
Dan Shi33412a82014-06-10 15:12:27 -0700328 def _check_servo_config(self):
329 """Check if config file exists for servod.
330
331 If servod config file does not exist, there is no need to verify if
332 servo is working. The servo could be attached to a board not supported
333 yet.
334
335 @raises ServoHostVerifyFailure if /var/lib/servod/config does not exist.
336
337 """
Simran Basi0739d682015-02-25 16:22:56 -0800338 if self._is_localhost:
339 return
Dan Shi33412a82014-06-10 15:12:27 -0700340 try:
341 self.run('test -f /var/lib/servod/config')
342 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
Ricky Liang86b80182014-06-13 14:39:42 +0800343 if not self._is_cros_host():
344 logging.info('Ignoring servo config check failure, either %s '
345 'is not running chromeos or we cannot find enough '
346 'information about the host.', self.hostname)
347 return
Dan Shi33412a82014-06-10 15:12:27 -0700348 raise ServoHostVerifyFailure(
349 'Servo config file check failed for %s: %s' %
350 (self.hostname, e))
351
352
Dan Shie5b3c512014-08-21 12:12:09 -0700353 def _check_servod_status(self):
354 """Check if servod process is running.
355
356 If servod is not running, there is no need to verify if servo is
357 working. Check the process before making any servod call can avoid
358 long timeout that eventually fail any servod call.
359 If the servo host is set to localhost, failure of servod status check
360 will be ignored, as servo call may use ssh tunnel.
361
362 @raises ServoHostVerifyFailure if servod process does not exist.
363
364 """
365 try:
Dan Shi18040e42014-09-03 11:14:00 -0700366 pids = [str(int(s)) for s in
367 self.run('pgrep servod').stdout.strip().split('\n')]
368 logging.info('servod is running, PID=%s', ','.join(pids))
Dan Shie5b3c512014-08-21 12:12:09 -0700369 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
370 if self._is_localhost:
371 logging.info('Ignoring servod status check failure. servo host '
372 'is set to localhost, servo call may use ssh '
373 'tunnel to go through.')
374 else:
375 raise ServoHostVerifyFailure(
376 'Servod status check failed for %s: %s' %
377 (self.hostname, e))
378
379
Dan Shi0942b1d2015-03-31 11:07:00 -0700380 def get_release_version(self):
381 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
382
383 @returns The version string in lsb-release, under attribute
384 CHROMEOS_RELEASE_VERSION.
385 """
386 lsb_release_content = self.run(
387 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
388 return lsbrelease_utils.get_chromeos_release_version(
389 lsb_release_content=lsb_release_content)
390
391
beeps5e8c45a2013-12-17 22:05:11 -0800392 @_timer.decorate
393 def _update_image(self):
394 """Update the image on the servo host, if needed.
395
J. Richard Barnette84895392015-04-30 12:31:01 -0700396 This method recognizes the following cases:
397 * If the Host is not running Chrome OS, do nothing.
398 * If a previously triggered update is now complete, reboot
399 to the new version.
400 * If the host is processing a previously triggered update,
401 do nothing.
402 * If the host is running a version of Chrome OS different
403 from the default for servo Hosts, trigger an update, but
404 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800405
406 @raises dev_server.DevServerException: If all the devservers are down.
407 @raises site_utils.ParseBuildNameException: If the devserver returns
408 an invalid build name.
409 @raises autoupdater.ChromiumOSError: If something goes wrong in the
410 checking update engine client status or applying an update.
411 @raises AutoservRunError: If the update_engine_client isn't present on
412 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700413
beeps5e8c45a2013-12-17 22:05:11 -0800414 """
415 #TODO(beeps): Remove this check once all servo hosts are using chromeos.
416 if not self._is_cros_host():
417 logging.info('Not attempting an update, either %s is not running '
418 'chromeos or we cannot find enough information about '
419 'the host.', self.hostname)
420 return
421
J. Richard Barnette84895392015-04-30 12:31:01 -0700422 board = global_config.global_config.get_config_value(
423 'CROS', 'servo_board')
424 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
425 target_version = afe.run('get_stable_version', board=board)
426 build_pattern = global_config.global_config.get_config_value(
427 'CROS', 'stable_build_pattern')
428 target_build = build_pattern % (board, target_version)
429 target_build_number = server_site_utils.ParseBuildName(
430 target_build)[3]
beeps5e8c45a2013-12-17 22:05:11 -0800431 ds = dev_server.ImageServer.resolve(self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700432 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800433
434 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Dan Shi0942b1d2015-03-31 11:07:00 -0700435 current_build_number = self.get_release_version()
beeps5e8c45a2013-12-17 22:05:11 -0800436 status = updater.check_update_status()
437
438 if status == autoupdater.UPDATER_NEED_REBOOT:
439 logging.info('Rebooting beaglebone host %s with build %s',
440 self.hostname, current_build_number)
441 kwargs = {
442 'reboot_cmd': ('((reboot & sleep 10; reboot -f &) '
443 '</dev/null >/dev/null 2>&1 &)'),
444 'fastsync': True,
445 'label': None,
Dan Shiddd7a0e2014-04-29 11:55:34 -0700446 'wait': False,
beeps5e8c45a2013-12-17 22:05:11 -0800447 }
Dan Shiddd7a0e2014-04-29 11:55:34 -0700448 # Do not wait for reboot to complete. Otherwise, self.reboot call
449 # will log reboot failure if servo does not come back. The logged
450 # reboot failure will lead to test job failure. If the test does not
451 # require servo, we don't want servo failure to fail the test with
452 # error: `Host did not return from reboot` in status.log
453 # If servo does not come back after reboot, exception needs to be
454 # raised, so test requires servo should fail.
beeps5e8c45a2013-12-17 22:05:11 -0800455 self.reboot(**kwargs)
Dan Shiddd7a0e2014-04-29 11:55:34 -0700456 if self.wait_up(timeout=120):
Dan Shi0942b1d2015-03-31 11:07:00 -0700457 current_build_number = self.get_release_version()
Dan Shiddd7a0e2014-04-29 11:55:34 -0700458 logging.info('servo host %s back from reboot, with build %s',
459 self.hostname, current_build_number)
460 else:
461 raise error.AutoservHostError(
462 'servo host %s failed to come back from reboot.' %
463 self.hostname)
beeps5e8c45a2013-12-17 22:05:11 -0800464
465 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
466 logging.info('servo host %s already processing an update, update '
467 'engine client status=%s', self.hostname, status)
J. Richard Barnette84895392015-04-30 12:31:01 -0700468 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800469 logging.info('Using devserver url: %s to trigger update on '
470 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700471 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800472 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700473 ds.stage_artifacts(target_build,
474 artifacts=['full_payload'])
475 except Exception as e:
476 logging.error('Staging artifacts failed: %s', str(e))
477 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800478 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700479 try:
480 updater.trigger_update()
481 except autoupdater.RootFSUpdateError as e:
482 trigger_download_status = 'failed with %s' % str(e)
483 autotest_stats.Counter(
484 'servo_host.RootFSUpdateError').increment()
485 else:
486 trigger_download_status = 'passed'
487 logging.info('Triggered download and update %s for %s, '
488 'update engine currently in status %s',
489 trigger_download_status, self.hostname,
490 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800491 else:
492 logging.info('servo host %s does not require an update.',
493 self.hostname)
494
495
Fang Deng5d518f42013-08-02 14:04:32 -0700496 def verify_software(self):
beeps5e8c45a2013-12-17 22:05:11 -0800497 """Update the servo host and verify it's in a good state.
Fang Deng5d518f42013-08-02 14:04:32 -0700498
499 It overrides the base class function for verify_software.
beeps5e8c45a2013-12-17 22:05:11 -0800500 If an update is available, downloads and applies it. Then verifies:
Fang Deng5d518f42013-08-02 14:04:32 -0700501 1) Whether basic servo command can run successfully.
502 2) Whether USB is in a good state. crbug.com/225932
503
504 @raises ServoHostVerifyFailure if servo host does not pass the checks.
505
506 """
beeps5e8c45a2013-12-17 22:05:11 -0800507 logging.info('Applying an update to the servo host, if necessary.')
508 self._update_image()
Dan Shi33412a82014-06-10 15:12:27 -0700509 self._check_servo_config()
Dan Shie5b3c512014-08-21 12:12:09 -0700510 self._check_servod_status()
511
Dan Shi4d478522014-02-14 13:46:32 -0800512 # If servo is already initialized, we don't need to do it again, call
513 # _check_servod should be enough.
514 if self._servo:
515 self._check_servod()
516 else:
517 self._servo = servo.Servo(servo_host=self)
Dan Shie5b3c512014-08-21 12:12:09 -0700518 timeout, _ = retry.timeout(
519 self._servo.initialize_dut,
520 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
521 if timeout:
522 raise ServoHostVerifyFailure('Servo initialize timed out.')
Fang Deng5d518f42013-08-02 14:04:32 -0700523 logging.info('Sanity checks pass on servo host %s', self.hostname)
524
525
526 def _repair_with_sysrq_reboot(self):
527 """Reboot with magic SysRq key."""
528 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
529 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
530 reboot_cmd='echo "b" > /proc/sysrq-trigger &',
531 fastsync=True)
532 time.sleep(self.REBOOT_DELAY_SECS)
533
534
Fang Dengd4fe7392013-09-20 12:18:21 -0700535 def has_power(self):
536 """Return whether or not the servo host is powered by PoE."""
537 # TODO(fdeng): See crbug.com/302791
538 # For now, assume all servo hosts in the lab have power.
539 return self.is_in_lab()
540
541
542 def power_cycle(self):
543 """Cycle power to this host via PoE if it is a lab device.
544
545 @raises ServoHostRepairFailure if it fails to power cycle the
546 servo host.
547
548 """
549 if self.has_power():
550 try:
551 rpm_client.set_power(self.hostname, 'CYCLE')
552 except (socket.error, xmlrpclib.Error,
553 httplib.BadStatusLine,
554 rpm_client.RemotePowerException) as e:
555 raise ServoHostRepairFailure(
556 'Power cycling %s failed: %s' % (self.hostname, e))
557 else:
558 logging.info('Skipping power cycling, not a lab device.')
559
560
Fang Deng5d518f42013-08-02 14:04:32 -0700561 def _powercycle_to_repair(self):
Fang Dengd4fe7392013-09-20 12:18:21 -0700562 """Power cycle the servo host using PoE.
563
564 @raises ServoHostRepairFailure if it fails to fix the servo host.
Fang Dengf0ea6142013-10-10 21:43:16 -0700565 @raises ServoHostRepairMethodNA if it does not support power.
Fang Dengd4fe7392013-09-20 12:18:21 -0700566
567 """
568 if not self.has_power():
Fang Dengf0ea6142013-10-10 21:43:16 -0700569 raise ServoHostRepairMethodNA('%s does not support power.' %
570 self.hostname)
Fang Dengd4fe7392013-09-20 12:18:21 -0700571 logging.info('Attempting repair via PoE powercycle.')
572 failed_cycles = 0
573 self.power_cycle()
574 while not self.wait_up(timeout=self.REBOOT_TIMEOUT_SECS):
575 failed_cycles += 1
576 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
577 raise ServoHostRepairFailure(
578 'Powercycled host %s %d times; device did not come back'
579 ' online.' % (self.hostname, failed_cycles))
580 self.power_cycle()
581 logging.info('Powercycling was successful after %d failures.',
582 failed_cycles)
583 # Allow some time for servod to get started.
584 time.sleep(self.REBOOT_DELAY_SECS)
Fang Deng5d518f42013-08-02 14:04:32 -0700585
586
587 def repair_full(self):
588 """Attempt to repair servo host.
589
590 This overrides the base class function for repair.
591 Note if the host is not in Cros Lab, the repair procedure
592 will be skipped.
593
594 @raises ServoHostRepairTotalFailure if all attempts fail.
595
596 """
597 if not self.is_in_lab():
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700598 logging.warning('Skip repairing servo host %s: Not a lab device.',
Fang Deng5d518f42013-08-02 14:04:32 -0700599 self.hostname)
600 return
601 logging.info('Attempting to repair servo host %s.', self.hostname)
Dan Shi4d478522014-02-14 13:46:32 -0800602 # Reset the cache to guarantee servo initialization being called later.
603 self._servo = None
Dan Shi0cf92c82014-02-20 15:45:01 -0800604 # TODO(dshi): add self._powercycle_to_repair back to repair_funcs
605 # after crbug.com/336606 is fixed.
606 repair_funcs = [self._repair_with_sysrq_reboot,]
Fang Deng5d518f42013-08-02 14:04:32 -0700607 errors = []
608 for repair_func in repair_funcs:
Fang Dengf0ea6142013-10-10 21:43:16 -0700609 counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
Fang Deng5d518f42013-08-02 14:04:32 -0700610 try:
611 repair_func()
612 self.verify()
Gabe Black1e1c41b2015-02-04 23:55:15 -0800613 autotest_stats.Counter(counter_prefix + 'SUCCEEDED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700614 return
Fang Dengf0ea6142013-10-10 21:43:16 -0700615 except ServoHostRepairMethodNA as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700616 logging.warning('Repair method NA: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800617 autotest_stats.Counter(counter_prefix + 'RepairNA').increment()
Fang Dengf0ea6142013-10-10 21:43:16 -0700618 errors.append(str(e))
Fang Deng5d518f42013-08-02 14:04:32 -0700619 except Exception as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700620 logging.warning('Failed to repair servo: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800621 autotest_stats.Counter(counter_prefix + 'FAILED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700622 errors.append(str(e))
Gabe Black1e1c41b2015-02-04 23:55:15 -0800623 autotest_stats.Counter('servo_host_repair.Full_Repair_Failed'). \
624 increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700625 raise ServoHostRepairTotalFailure(
626 'All attempts at repairing the servo failed:\n%s' %
627 '\n'.join(errors))
628
629
Dan Shi4d478522014-02-14 13:46:32 -0800630 def get_servo(self):
631 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700632
Dan Shi4d478522014-02-14 13:46:32 -0800633 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700634 """
Dan Shi4d478522014-02-14 13:46:32 -0800635 return self._servo
636
637
Fang Denge545abb2014-12-30 18:43:47 -0800638def create_servo_host(dut, servo_args, try_lab_servo=False):
Dan Shi4d478522014-02-14 13:46:32 -0800639 """Create a ServoHost object.
640
Fang Denge545abb2014-12-30 18:43:47 -0800641 The `servo_args` parameter is a dictionary specifying optional
642 Servo client parameter overrides (i.e. a specific host or port).
643 When specified, the caller requires that an exception be raised
644 unless both the ServoHost and the Servo are successfully
645 created.
646
647 There are three possible cases:
648 1. If the DUT is in the Cros test lab then the ServoHost object
649 is only created for the host in the lab. Alternate host or
650 port settings in `servo_host` will be ignored.
651 2. When not case 1., but `servo_args` is not `None`, then create
652 a ServoHost object using `servo_args`.
653 3. Otherwise, return `None`.
654
655 When the `try_lab_servo` parameter is false, it indicates that a
656 ServoHost should not be created for a device in the Cros test
657 lab. The setting of `servo_args` takes precedence over the
658 setting of `try_lab_servo`.
Dan Shi4d478522014-02-14 13:46:32 -0800659
660 @param dut: host name of the host that servo connects. It can be used to
661 lookup the servo in test lab using naming convention.
662 @param servo_args: A dictionary that contains args for creating
663 a ServoHost object,
664 e.g. {'servo_host': '172.11.11.111',
665 'servo_port': 9999}.
666 See comments above.
Fang Denge545abb2014-12-30 18:43:47 -0800667 @param try_lab_servo: Boolean. Whether to create ServoHost for a device
668 in test lab. See above.
Dan Shi4d478522014-02-14 13:46:32 -0800669
670 @returns: A ServoHost object or None. See comments above.
671
672 """
Simran Basi0739d682015-02-25 16:22:56 -0800673 if not utils.is_moblab():
Cheng-Yi Chiang22612862015-08-20 20:39:57 +0800674 dut_is_hostname = not dnsname_mangler.is_ip_address(dut)
675 if dut_is_hostname:
676 lab_servo_hostname = make_servo_hostname(dut)
677 is_in_lab = utils.host_is_in_lab_zone(lab_servo_hostname)
678 else:
679 is_in_lab = False
Simran Basi0739d682015-02-25 16:22:56 -0800680 else:
681 # Servos on Moblab are not in the actual lab.
682 is_in_lab = False
683 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
684 hosts = afe.get_hosts(hostname=dut)
685 if hosts and SERVO_HOST_ATTR in hosts[0].attributes:
686 servo_args = {}
687 servo_args[SERVO_HOST_ATTR] = hosts[0].attributes[SERVO_HOST_ATTR]
688 servo_args[SERVO_PORT_ATTR] = hosts[0].attributes.get(
689 SERVO_PORT_ATTR, 9999)
Dan Shi4d478522014-02-14 13:46:32 -0800690
Fang Denge545abb2014-12-30 18:43:47 -0800691 if not is_in_lab:
692 if servo_args is None:
693 return None
694 return ServoHost(required_by_test=True, is_in_lab=False, **servo_args)
695 elif servo_args is not None or try_lab_servo:
Christopher Wileycef1f902014-06-19 11:11:23 -0700696 # Technically, this duplicates the SSH ping done early in the servo
697 # proxy initialization code. However, this ping ends in a couple
698 # seconds when if fails, rather than the 60 seconds it takes to decide
699 # that an SSH ping has timed out. Specifically, that timeout happens
700 # when our servo DNS name resolves, but there is no host at that IP.
701 # TODO(dshi): crbug.com/380773 Remove this ping check once the bug is
702 # fixed. Autotest should not try to verify servo if servo is
703 # not required for the test.
704 ping_config = ping_runner.PingConfig(
705 lab_servo_hostname, count=3,
706 ignore_result=True, ignore_status=True)
707 logging.info('Pinging servo at %s', lab_servo_hostname)
708 host_is_up = ping_runner.PingRunner().ping(ping_config).received > 0
709 if host_is_up:
710 return ServoHost(servo_host=lab_servo_hostname, is_in_lab=is_in_lab,
711 required_by_test=(servo_args is not None))
Dan Shi4d478522014-02-14 13:46:32 -0800712 else:
Dan Shibbb0cb62014-03-24 17:50:57 -0700713 return None