blob: 603b49921635e22395ab273affe176e4d984e64f [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
21from autotest_lib.client.common_lib.cros import autoupdater
22from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070023from autotest_lib.client.common_lib.cros import retry
Michael Liangda8c60a2014-06-03 13:24:51 -070024from autotest_lib.client.common_lib.cros.graphite import stats
Christopher Wileycef1f902014-06-19 11:11:23 -070025from autotest_lib.client.common_lib.cros.network import ping_runner
beeps5e8c45a2013-12-17 22:05:11 -080026from autotest_lib.server import site_utils as server_site_utils
Fang Deng5d518f42013-08-02 14:04:32 -070027from autotest_lib.server.cros.servo import servo
28from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070029from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070030
31
32class ServoHostException(error.AutoservError):
33 """This is the base class for exceptions raised by ServoHost."""
34 pass
35
36
37class ServoHostVerifyFailure(ServoHostException):
38 """Raised when servo verification fails."""
39 pass
40
41
Fang Dengd4fe7392013-09-20 12:18:21 -070042class ServoHostRepairFailure(ServoHostException):
43 """Raised when a repair method fails to repair a servo host."""
44 pass
45
46
Fang Dengf0ea6142013-10-10 21:43:16 -070047class ServoHostRepairMethodNA(ServoHostException):
48 """Raised when a repair method is not applicable."""
49 pass
50
51
Fang Deng5d518f42013-08-02 14:04:32 -070052class ServoHostRepairTotalFailure(ServoHostException):
53 """Raised if all attempts to repair a servo host fail."""
54 pass
55
56
57def make_servo_hostname(dut_hostname):
58 """Given a DUT's hostname, return the hostname of its servo.
59
60 @param dut_hostname: hostname of a DUT.
61
62 @return hostname of the DUT's servo.
63
64 """
65 host_parts = dut_hostname.split('.')
66 host_parts[0] = host_parts[0] + '-servo'
67 return '.'.join(host_parts)
68
69
70class ServoHost(ssh_host.SSHHost):
71 """Host class for a host that controls a servo, e.g. beaglebone."""
72
73 # Timeout for getting the value of 'pwr_button'.
74 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
75 # Timeout for rebooting servo host.
76 REBOOT_TIMEOUT_SECS = 90
77 HOST_DOWN_TIMEOUT_SECS = 60
78 # Delay after rebooting for servod to become fully functional.
79 REBOOT_DELAY_SECS = 20
80 # Servod process name.
81 SERVOD_PROCESS = 'servod'
82
Fang Dengd4fe7392013-09-20 12:18:21 -070083 _MAX_POWER_CYCLE_ATTEMPTS = 3
beeps5e8c45a2013-12-17 22:05:11 -080084 _timer = stats.Timer('servo_host')
Fang Dengd4fe7392013-09-20 12:18:21 -070085
Fang Deng5d518f42013-08-02 14:04:32 -070086
87 def _initialize(self, servo_host='localhost', servo_port=9999,
Dan Shi4d478522014-02-14 13:46:32 -080088 required_by_test=True, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -070089 """Initialize a ServoHost instance.
90
91 A ServoHost instance represents a host that controls a servo.
92
93 @param servo_host: Name of the host where the servod process
94 is running.
95 @param servo_port: Port the servod process is listening on.
Dan Shi4d478522014-02-14 13:46:32 -080096 @param required_by_test: True if servo is required by test.
97 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
98 to None, for which utils.host_is_in_lab_zone will be
99 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700100
101 """
102 super(ServoHost, self)._initialize(hostname=servo_host,
103 *args, **dargs)
Dan Shi4d478522014-02-14 13:46:32 -0800104 if is_in_lab is None:
105 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
106 else:
107 self._is_in_lab = is_in_lab
Fang Deng5d518f42013-08-02 14:04:32 -0700108 self._is_localhost = (self.hostname == 'localhost')
109 remote = 'http://%s:%s' % (self.hostname, servo_port)
110 self._servod_server = xmlrpclib.ServerProxy(remote)
111 # Commands on the servo host must be run by the superuser. Our account
112 # on Beaglebone is root, but locally we might be running as a
113 # different user. If so - `sudo ' will have to be added to the
114 # commands.
115 if self._is_localhost:
116 self._sudo_required = utils.system_output('id -u') != '0'
117 else:
118 self._sudo_required = False
Dan Shi4d478522014-02-14 13:46:32 -0800119 # Create a cache of Servo object. This must be called at the end of
120 # _initialize to make sure all attributes are set.
121 self._servo = None
122 try:
123 self.verify()
Alex Millercc589692014-04-21 18:00:22 -0700124 except Exception:
Dan Shibbb0cb62014-03-24 17:50:57 -0700125 if required_by_test:
126 if not self.is_in_lab():
127 raise
128 else:
129 self.repair_full()
Fang Deng5d518f42013-08-02 14:04:32 -0700130
131
132 def is_in_lab(self):
133 """Check whether the servo host is a lab device.
134
135 @returns: True if the servo host is in Cros Lab, otherwise False.
136
137 """
138 return self._is_in_lab
139
140
141 def is_localhost(self):
142 """Checks whether the servo host points to localhost.
143
144 @returns: True if it points to localhost, otherwise False.
145
146 """
147 return self._is_localhost
148
149
150 def get_servod_server_proxy(self):
151 """Return a proxy that can be used to communicate with servod server.
152
153 @returns: An xmlrpclib.ServerProxy that is connected to the servod
154 server on the host.
155
156 """
157 return self._servod_server
158
159
160 def get_wait_up_processes(self):
161 """Get the list of local processes to wait for in wait_up.
162
163 Override get_wait_up_processes in
164 autotest_lib.client.common_lib.hosts.base_classes.Host.
165 Wait for servod process to go up. Called by base class when
166 rebooting the device.
167
168 """
169 processes = [self.SERVOD_PROCESS]
170 return processes
171
172
beeps5e8c45a2013-12-17 22:05:11 -0800173 def _is_cros_host(self):
174 """Check if a servo host is running chromeos.
175
176 @return: True if the servo host is running chromeos.
177 False if it isn't, or we don't have enough information.
178 """
179 try:
180 result = self.run('grep -q CHROMEOS /etc/lsb-release',
181 ignore_status=True, timeout=10)
182 except (error.AutoservRunError, error.AutoservSSHTimeout):
183 return False
184 return result.exit_status == 0
185
186
Fang Deng5d518f42013-08-02 14:04:32 -0700187 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
188 connect_timeout=None, alive_interval=None):
189 """Override default make_ssh_command to use tuned options.
190
191 Tuning changes:
192 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
193 connection failure. Consistency with remote_access.py.
194
195 - ServerAliveInterval=180; which causes SSH to ping connection every
196 180 seconds. In conjunction with ServerAliveCountMax ensures
197 that if the connection dies, Autotest will bail out quickly.
198
199 - ServerAliveCountMax=3; consistency with remote_access.py.
200
201 - ConnectAttempts=4; reduce flakiness in connection errors;
202 consistency with remote_access.py.
203
204 - UserKnownHostsFile=/dev/null; we don't care about the keys.
205
206 - SSH protocol forced to 2; needed for ServerAliveInterval.
207
208 @param user User name to use for the ssh connection.
209 @param port Port on the target host to use for ssh connection.
210 @param opts Additional options to the ssh command.
211 @param hosts_file Ignored.
212 @param connect_timeout Ignored.
213 @param alive_interval Ignored.
214
215 @returns: An ssh command with the requested settings.
216
217 """
218 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
219 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
220 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
221 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
222 ' -o Protocol=2 -l %s -p %d')
223 return base_command % (opts, user, port)
224
225
226 def _make_scp_cmd(self, sources, dest):
227 """Format scp command.
228
229 Given a list of source paths and a destination path, produces the
230 appropriate scp command for encoding it. Remote paths must be
231 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
232 to allow additional ssh options.
233
234 @param sources: A list of source paths to copy from.
235 @param dest: Destination path to copy to.
236
237 @returns: An scp command that copies |sources| on local machine to
238 |dest| on the remote servo host.
239
240 """
241 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
242 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
243 return command % (self.master_ssh_option,
244 self.port, ' '.join(sources), dest)
245
246
247 def run(self, command, timeout=3600, ignore_status=False,
248 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
249 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
250 """Run a command on the servo host.
251
252 Extends method `run` in SSHHost. If the servo host is a remote device,
253 it will call `run` in SSHost without changing anything.
254 If the servo host is 'localhost', it will call utils.system_output.
255
256 @param command: The command line string.
257 @param timeout: Time limit in seconds before attempting to
258 kill the running process. The run() function
259 will take a few seconds longer than 'timeout'
260 to complete if it has to kill the process.
261 @param ignore_status: Do not raise an exception, no matter
262 what the exit code of the command is.
263 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
264 @param connect_timeout: SSH connection timeout (in seconds)
265 Ignored if host is 'localhost'.
266 @param options: String with additional ssh command options
267 Ignored if host is 'localhost'.
268 @param stdin: Stdin to pass (a string) to the executed command.
269 @param verbose: Log the commands.
270 @param args: Sequence of strings to pass as arguments to command by
271 quoting them in " and escaping their contents if necessary.
272
273 @returns: A utils.CmdResult object.
274
275 @raises AutoservRunError if the command failed.
276 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
277 when servo host is not 'localhost'.
278
279 """
280 run_args = {'command': command, 'timeout': timeout,
281 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
282 'stderr_tee': stderr_tee, 'stdin': stdin,
283 'verbose': verbose, 'args': args}
284 if self.is_localhost():
285 if self._sudo_required:
286 run_args['command'] = 'sudo -n %s' % command
287 try:
288 return utils.run(**run_args)
289 except error.CmdError as e:
290 logging.error(e)
291 raise error.AutoservRunError('command execution error',
292 e.result_obj)
293 else:
294 run_args['connect_timeout'] = connect_timeout
295 run_args['options'] = options
296 return super(ServoHost, self).run(**run_args)
297
298
Dan Shi33412a82014-06-10 15:12:27 -0700299 @_timer.decorate
Fang Deng5d518f42013-08-02 14:04:32 -0700300 def _check_servod(self):
301 """A sanity check of the servod state."""
302 msg_prefix = 'Servod error: %s'
303 error_msg = None
304 try:
305 timeout, _ = retry.timeout(
306 self._servod_server.get, args=('pwr_button', ),
307 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
308 if timeout:
309 error_msg = msg_prefix % 'Request timed out.'
310 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
311 error_msg = msg_prefix % e
312 if error_msg:
313 raise ServoHostVerifyFailure(error_msg)
314
315
316 def _check_servo_host_usb(self):
317 """A sanity check of the USB device.
318
319 Sometimes the usb gets wedged due to a kernel bug on the beaglebone.
320 A symptom is the presence of /dev/sda without /dev/sda1. The check
321 here ensures that if /dev/sda exists, /dev/sda1 must also exist.
322 See crbug.com/225932.
323
324 @raises ServoHostVerifyFailure if /dev/sda exists without /dev/sda1 on
325 the beaglebone.
326
327 """
328 try:
329 # The following test exits with a non-zero code
330 # and raises AutoserverRunError if error is detected.
331 self.run('test ! -b /dev/sda -o -b /dev/sda1')
332 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
333 raise ServoHostVerifyFailure(
334 'USB sanity check on %s failed: %s' % (self.hostname, e))
335
336
Dan Shi33412a82014-06-10 15:12:27 -0700337 def _check_servo_config(self):
338 """Check if config file exists for servod.
339
340 If servod config file does not exist, there is no need to verify if
341 servo is working. The servo could be attached to a board not supported
342 yet.
343
344 @raises ServoHostVerifyFailure if /var/lib/servod/config does not exist.
345
346 """
347 try:
348 self.run('test -f /var/lib/servod/config')
349 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
Ricky Liang86b80182014-06-13 14:39:42 +0800350 if not self._is_cros_host():
351 logging.info('Ignoring servo config check failure, either %s '
352 'is not running chromeos or we cannot find enough '
353 'information about the host.', self.hostname)
354 return
Dan Shi33412a82014-06-10 15:12:27 -0700355 raise ServoHostVerifyFailure(
356 'Servo config file check failed for %s: %s' %
357 (self.hostname, e))
358
359
beeps5e8c45a2013-12-17 22:05:11 -0800360 @_timer.decorate
361 def _update_image(self):
362 """Update the image on the servo host, if needed.
363
364 This method does nothing for servo hosts that are not running chromeos.
365 If the host is running chromeos, and a newer image is available on the
366 devserver, trigger a download and apply it in the background. If an
367 update has already been downloaded and applied, reboot the servo host
368 into the new image. If update_engine_client is in the process of
369 applying an update that was triggered on a previous invocation, do
370 nothing.
371
372 @raises dev_server.DevServerException: If all the devservers are down.
373 @raises site_utils.ParseBuildNameException: If the devserver returns
374 an invalid build name.
375 @raises autoupdater.ChromiumOSError: If something goes wrong in the
376 checking update engine client status or applying an update.
377 @raises AutoservRunError: If the update_engine_client isn't present on
378 the host, and the host is a cros_host.
379 """
380 #TODO(beeps): Remove this check once all servo hosts are using chromeos.
381 if not self._is_cros_host():
382 logging.info('Not attempting an update, either %s is not running '
383 'chromeos or we cannot find enough information about '
384 'the host.', self.hostname)
385 return
386
387 update_branch = global_config.global_config.get_config_value(
388 'CROS', 'servo_builder')
389 ds = dev_server.ImageServer.resolve(self.hostname)
390 latest_build = ds.get_latest_build_in_server(target=update_branch)
391
392 # We might have just purged all the beaglebone builds on the devserver
393 # after having triggered a download the last time we verified this
394 # beaglebone, so we still need to reboot if necessary.
395 if latest_build is None:
396 logging.debug('Could not find any builds for %s on %s',
397 update_branch, ds.url())
398 url = ds.url()
399 latest_build_number = None
400 else:
401 latest_build = '%s/%s' % (update_branch, latest_build)
402 latest_build_number = server_site_utils.ParseBuildName(
403 latest_build)[3]
404 url = ds.get_update_url(latest_build)
405
406 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
407 current_build_number = updater.get_build_id()
408 status = updater.check_update_status()
409
410 if status == autoupdater.UPDATER_NEED_REBOOT:
411 logging.info('Rebooting beaglebone host %s with build %s',
412 self.hostname, current_build_number)
413 kwargs = {
414 'reboot_cmd': ('((reboot & sleep 10; reboot -f &) '
415 '</dev/null >/dev/null 2>&1 &)'),
416 'fastsync': True,
417 'label': None,
Dan Shiddd7a0e2014-04-29 11:55:34 -0700418 'wait': False,
beeps5e8c45a2013-12-17 22:05:11 -0800419 }
Dan Shiddd7a0e2014-04-29 11:55:34 -0700420 # Do not wait for reboot to complete. Otherwise, self.reboot call
421 # will log reboot failure if servo does not come back. The logged
422 # reboot failure will lead to test job failure. If the test does not
423 # require servo, we don't want servo failure to fail the test with
424 # error: `Host did not return from reboot` in status.log
425 # If servo does not come back after reboot, exception needs to be
426 # raised, so test requires servo should fail.
beeps5e8c45a2013-12-17 22:05:11 -0800427 self.reboot(**kwargs)
Dan Shiddd7a0e2014-04-29 11:55:34 -0700428 if self.wait_up(timeout=120):
429 current_build_number = updater.get_build_id()
430 logging.info('servo host %s back from reboot, with build %s',
431 self.hostname, current_build_number)
432 else:
433 raise error.AutoservHostError(
434 'servo host %s failed to come back from reboot.' %
435 self.hostname)
beeps5e8c45a2013-12-17 22:05:11 -0800436
437 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
438 logging.info('servo host %s already processing an update, update '
439 'engine client status=%s', self.hostname, status)
440 elif (latest_build_number and
441 current_build_number != latest_build_number):
442 logging.info('Using devserver url: %s to trigger update on '
443 'servo host %s, from %s to %s', url, self.hostname,
444 current_build_number, latest_build_number)
445 try:
446 updater.trigger_update()
447 except autoupdater.RootFSUpdateError as e:
448 trigger_download_status = 'failed with %s' % str(e)
449 stats.Counter('servo_host.RootFSUpdateError').increment()
450 else:
451 trigger_download_status = 'passed'
452 logging.info('Triggered download and update %s for %s, '
453 'update engine currently in status %s',
454 trigger_download_status, self.hostname,
455 updater.check_update_status())
456 else:
457 logging.info('servo host %s does not require an update.',
458 self.hostname)
459
460
Fang Deng5d518f42013-08-02 14:04:32 -0700461 def verify_software(self):
beeps5e8c45a2013-12-17 22:05:11 -0800462 """Update the servo host and verify it's in a good state.
Fang Deng5d518f42013-08-02 14:04:32 -0700463
464 It overrides the base class function for verify_software.
beeps5e8c45a2013-12-17 22:05:11 -0800465 If an update is available, downloads and applies it. Then verifies:
Fang Deng5d518f42013-08-02 14:04:32 -0700466 1) Whether basic servo command can run successfully.
467 2) Whether USB is in a good state. crbug.com/225932
468
469 @raises ServoHostVerifyFailure if servo host does not pass the checks.
470
471 """
beeps5e8c45a2013-12-17 22:05:11 -0800472 logging.info('Applying an update to the servo host, if necessary.')
473 self._update_image()
474
Dan Shi33412a82014-06-10 15:12:27 -0700475 logging.info('Verifying if servo config file exists.')
476 self._check_servo_config()
477
Fang Deng5d518f42013-08-02 14:04:32 -0700478 logging.info('Verifying servo host %s with sanity checks.',
479 self.hostname)
Fang Deng5d518f42013-08-02 14:04:32 -0700480 self._check_servo_host_usb()
Dan Shi33412a82014-06-10 15:12:27 -0700481
Dan Shi4d478522014-02-14 13:46:32 -0800482 # If servo is already initialized, we don't need to do it again, call
483 # _check_servod should be enough.
484 if self._servo:
485 self._check_servod()
486 else:
487 self._servo = servo.Servo(servo_host=self)
488
Fang Deng5d518f42013-08-02 14:04:32 -0700489 logging.info('Sanity checks pass on servo host %s', self.hostname)
490
491
492 def _repair_with_sysrq_reboot(self):
493 """Reboot with magic SysRq key."""
494 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
495 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
496 reboot_cmd='echo "b" > /proc/sysrq-trigger &',
497 fastsync=True)
498 time.sleep(self.REBOOT_DELAY_SECS)
499
500
Fang Dengd4fe7392013-09-20 12:18:21 -0700501 def has_power(self):
502 """Return whether or not the servo host is powered by PoE."""
503 # TODO(fdeng): See crbug.com/302791
504 # For now, assume all servo hosts in the lab have power.
505 return self.is_in_lab()
506
507
508 def power_cycle(self):
509 """Cycle power to this host via PoE if it is a lab device.
510
511 @raises ServoHostRepairFailure if it fails to power cycle the
512 servo host.
513
514 """
515 if self.has_power():
516 try:
517 rpm_client.set_power(self.hostname, 'CYCLE')
518 except (socket.error, xmlrpclib.Error,
519 httplib.BadStatusLine,
520 rpm_client.RemotePowerException) as e:
521 raise ServoHostRepairFailure(
522 'Power cycling %s failed: %s' % (self.hostname, e))
523 else:
524 logging.info('Skipping power cycling, not a lab device.')
525
526
Fang Deng5d518f42013-08-02 14:04:32 -0700527 def _powercycle_to_repair(self):
Fang Dengd4fe7392013-09-20 12:18:21 -0700528 """Power cycle the servo host using PoE.
529
530 @raises ServoHostRepairFailure if it fails to fix the servo host.
Fang Dengf0ea6142013-10-10 21:43:16 -0700531 @raises ServoHostRepairMethodNA if it does not support power.
Fang Dengd4fe7392013-09-20 12:18:21 -0700532
533 """
534 if not self.has_power():
Fang Dengf0ea6142013-10-10 21:43:16 -0700535 raise ServoHostRepairMethodNA('%s does not support power.' %
536 self.hostname)
Fang Dengd4fe7392013-09-20 12:18:21 -0700537 logging.info('Attempting repair via PoE powercycle.')
538 failed_cycles = 0
539 self.power_cycle()
540 while not self.wait_up(timeout=self.REBOOT_TIMEOUT_SECS):
541 failed_cycles += 1
542 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
543 raise ServoHostRepairFailure(
544 'Powercycled host %s %d times; device did not come back'
545 ' online.' % (self.hostname, failed_cycles))
546 self.power_cycle()
547 logging.info('Powercycling was successful after %d failures.',
548 failed_cycles)
549 # Allow some time for servod to get started.
550 time.sleep(self.REBOOT_DELAY_SECS)
Fang Deng5d518f42013-08-02 14:04:32 -0700551
552
553 def repair_full(self):
554 """Attempt to repair servo host.
555
556 This overrides the base class function for repair.
557 Note if the host is not in Cros Lab, the repair procedure
558 will be skipped.
559
560 @raises ServoHostRepairTotalFailure if all attempts fail.
561
562 """
563 if not self.is_in_lab():
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700564 logging.warning('Skip repairing servo host %s: Not a lab device.',
Fang Deng5d518f42013-08-02 14:04:32 -0700565 self.hostname)
566 return
567 logging.info('Attempting to repair servo host %s.', self.hostname)
Dan Shi4d478522014-02-14 13:46:32 -0800568 # Reset the cache to guarantee servo initialization being called later.
569 self._servo = None
Dan Shi0cf92c82014-02-20 15:45:01 -0800570 # TODO(dshi): add self._powercycle_to_repair back to repair_funcs
571 # after crbug.com/336606 is fixed.
572 repair_funcs = [self._repair_with_sysrq_reboot,]
Fang Deng5d518f42013-08-02 14:04:32 -0700573 errors = []
574 for repair_func in repair_funcs:
Fang Dengf0ea6142013-10-10 21:43:16 -0700575 counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
Fang Deng5d518f42013-08-02 14:04:32 -0700576 try:
577 repair_func()
578 self.verify()
Fang Dengf0ea6142013-10-10 21:43:16 -0700579 stats.Counter(counter_prefix + 'SUCCEEDED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700580 return
Fang Dengf0ea6142013-10-10 21:43:16 -0700581 except ServoHostRepairMethodNA as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700582 logging.warning('Repair method NA: %s', e)
Fang Dengf0ea6142013-10-10 21:43:16 -0700583 stats.Counter(counter_prefix + 'RepairNA').increment()
584 errors.append(str(e))
Fang Deng5d518f42013-08-02 14:04:32 -0700585 except Exception as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700586 logging.warning('Failed to repair servo: %s', e)
Fang Dengf0ea6142013-10-10 21:43:16 -0700587 stats.Counter(counter_prefix + 'FAILED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700588 errors.append(str(e))
Fang Dengf0ea6142013-10-10 21:43:16 -0700589 stats.Counter('servo_host_repair.Full_Repair_Failed').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700590 raise ServoHostRepairTotalFailure(
591 'All attempts at repairing the servo failed:\n%s' %
592 '\n'.join(errors))
593
594
Dan Shi4d478522014-02-14 13:46:32 -0800595 def get_servo(self):
596 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700597
Dan Shi4d478522014-02-14 13:46:32 -0800598 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700599 """
Dan Shi4d478522014-02-14 13:46:32 -0800600 return self._servo
601
602
603def create_servo_host(dut, servo_args):
604 """Create a ServoHost object.
605
606 There three possible cases:
607 1) If the DUT is in Cros Lab and has a beaglebone and a servo, then
608 create a ServoHost object pointing to the beaglebone. servo_args
609 is ignored.
610 2) If not case 1) and servo_args is neither None nor empty, then
611 create a ServoHost object using servo_args.
612 3) If neither case 1) or 2) applies, return None.
613 When the servo_args is not None, we assume the servo is required by the
614 test. If servo failed to be verified, we will attempt to repair it. If servo
615 is not required, we will initialize ServoHost without initializing a servo
616 object.
617
618 @param dut: host name of the host that servo connects. It can be used to
619 lookup the servo in test lab using naming convention.
620 @param servo_args: A dictionary that contains args for creating
621 a ServoHost object,
622 e.g. {'servo_host': '172.11.11.111',
623 'servo_port': 9999}.
624 See comments above.
625
626 @returns: A ServoHost object or None. See comments above.
627
628 """
629 lab_servo_hostname = make_servo_hostname(dut)
630 is_in_lab = utils.host_is_in_lab_zone(lab_servo_hostname)
631
632 if is_in_lab:
Christopher Wileycef1f902014-06-19 11:11:23 -0700633 # Technically, this duplicates the SSH ping done early in the servo
634 # proxy initialization code. However, this ping ends in a couple
635 # seconds when if fails, rather than the 60 seconds it takes to decide
636 # that an SSH ping has timed out. Specifically, that timeout happens
637 # when our servo DNS name resolves, but there is no host at that IP.
638 # TODO(dshi): crbug.com/380773 Remove this ping check once the bug is
639 # fixed. Autotest should not try to verify servo if servo is
640 # not required for the test.
641 ping_config = ping_runner.PingConfig(
642 lab_servo_hostname, count=3,
643 ignore_result=True, ignore_status=True)
644 logging.info('Pinging servo at %s', lab_servo_hostname)
645 host_is_up = ping_runner.PingRunner().ping(ping_config).received > 0
646 if host_is_up:
647 return ServoHost(servo_host=lab_servo_hostname, is_in_lab=is_in_lab,
648 required_by_test=(servo_args is not None))
Dan Shi4d478522014-02-14 13:46:32 -0800649 elif servo_args is not None:
650 return ServoHost(required_by_test=True, is_in_lab=False, **servo_args)
651 else:
Dan Shibbb0cb62014-03-24 17:50:57 -0700652 return None