blob: 0f58b05fb84538cb3ed428e6d2cea783ffc5746d [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
21from autotest_lib.client.common_lib.cros import autoupdater
22from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070023from autotest_lib.client.common_lib.cros import retry
Michael Liangda8c60a2014-06-03 13:24:51 -070024from autotest_lib.client.common_lib.cros.graphite import stats
Christopher Wileycef1f902014-06-19 11:11:23 -070025from autotest_lib.client.common_lib.cros.network import ping_runner
beeps5e8c45a2013-12-17 22:05:11 -080026from autotest_lib.server import site_utils as server_site_utils
Fang Deng5d518f42013-08-02 14:04:32 -070027from autotest_lib.server.cros.servo import servo
28from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070029from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070030
31
32class ServoHostException(error.AutoservError):
33 """This is the base class for exceptions raised by ServoHost."""
34 pass
35
36
37class ServoHostVerifyFailure(ServoHostException):
38 """Raised when servo verification fails."""
39 pass
40
41
Fang Dengd4fe7392013-09-20 12:18:21 -070042class ServoHostRepairFailure(ServoHostException):
43 """Raised when a repair method fails to repair a servo host."""
44 pass
45
46
Fang Dengf0ea6142013-10-10 21:43:16 -070047class ServoHostRepairMethodNA(ServoHostException):
48 """Raised when a repair method is not applicable."""
49 pass
50
51
Fang Deng5d518f42013-08-02 14:04:32 -070052class ServoHostRepairTotalFailure(ServoHostException):
53 """Raised if all attempts to repair a servo host fail."""
54 pass
55
56
57def make_servo_hostname(dut_hostname):
58 """Given a DUT's hostname, return the hostname of its servo.
59
60 @param dut_hostname: hostname of a DUT.
61
62 @return hostname of the DUT's servo.
63
64 """
65 host_parts = dut_hostname.split('.')
66 host_parts[0] = host_parts[0] + '-servo'
67 return '.'.join(host_parts)
68
69
70class ServoHost(ssh_host.SSHHost):
71 """Host class for a host that controls a servo, e.g. beaglebone."""
72
73 # Timeout for getting the value of 'pwr_button'.
74 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
75 # Timeout for rebooting servo host.
76 REBOOT_TIMEOUT_SECS = 90
77 HOST_DOWN_TIMEOUT_SECS = 60
78 # Delay after rebooting for servod to become fully functional.
79 REBOOT_DELAY_SECS = 20
80 # Servod process name.
81 SERVOD_PROCESS = 'servod'
Dan Shie5b3c512014-08-21 12:12:09 -070082 # Timeout for initializing servo signals.
83 INITIALIZE_SERVO_TIMEOUT_SECS = 30
Fang Deng5d518f42013-08-02 14:04:32 -070084
Fang Dengd4fe7392013-09-20 12:18:21 -070085 _MAX_POWER_CYCLE_ATTEMPTS = 3
beeps5e8c45a2013-12-17 22:05:11 -080086 _timer = stats.Timer('servo_host')
Fang Dengd4fe7392013-09-20 12:18:21 -070087
Fang Deng5d518f42013-08-02 14:04:32 -070088
89 def _initialize(self, servo_host='localhost', servo_port=9999,
Dan Shi4d478522014-02-14 13:46:32 -080090 required_by_test=True, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -070091 """Initialize a ServoHost instance.
92
93 A ServoHost instance represents a host that controls a servo.
94
95 @param servo_host: Name of the host where the servod process
96 is running.
97 @param servo_port: Port the servod process is listening on.
Dan Shi4d478522014-02-14 13:46:32 -080098 @param required_by_test: True if servo is required by test.
99 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
100 to None, for which utils.host_is_in_lab_zone will be
101 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700102
103 """
104 super(ServoHost, self)._initialize(hostname=servo_host,
105 *args, **dargs)
Dan Shi4d478522014-02-14 13:46:32 -0800106 if is_in_lab is None:
107 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
108 else:
109 self._is_in_lab = is_in_lab
Fang Deng5d518f42013-08-02 14:04:32 -0700110 self._is_localhost = (self.hostname == 'localhost')
111 remote = 'http://%s:%s' % (self.hostname, servo_port)
112 self._servod_server = xmlrpclib.ServerProxy(remote)
113 # Commands on the servo host must be run by the superuser. Our account
114 # on Beaglebone is root, but locally we might be running as a
115 # different user. If so - `sudo ' will have to be added to the
116 # commands.
117 if self._is_localhost:
118 self._sudo_required = utils.system_output('id -u') != '0'
119 else:
120 self._sudo_required = False
Dan Shi4d478522014-02-14 13:46:32 -0800121 # Create a cache of Servo object. This must be called at the end of
122 # _initialize to make sure all attributes are set.
123 self._servo = None
124 try:
125 self.verify()
Alex Millercc589692014-04-21 18:00:22 -0700126 except Exception:
Dan Shibbb0cb62014-03-24 17:50:57 -0700127 if required_by_test:
128 if not self.is_in_lab():
129 raise
130 else:
131 self.repair_full()
Fang Deng5d518f42013-08-02 14:04:32 -0700132
133
134 def is_in_lab(self):
135 """Check whether the servo host is a lab device.
136
137 @returns: True if the servo host is in Cros Lab, otherwise False.
138
139 """
140 return self._is_in_lab
141
142
143 def is_localhost(self):
144 """Checks whether the servo host points to localhost.
145
146 @returns: True if it points to localhost, otherwise False.
147
148 """
149 return self._is_localhost
150
151
152 def get_servod_server_proxy(self):
153 """Return a proxy that can be used to communicate with servod server.
154
155 @returns: An xmlrpclib.ServerProxy that is connected to the servod
156 server on the host.
157
158 """
159 return self._servod_server
160
161
162 def get_wait_up_processes(self):
163 """Get the list of local processes to wait for in wait_up.
164
165 Override get_wait_up_processes in
166 autotest_lib.client.common_lib.hosts.base_classes.Host.
167 Wait for servod process to go up. Called by base class when
168 rebooting the device.
169
170 """
171 processes = [self.SERVOD_PROCESS]
172 return processes
173
174
beeps5e8c45a2013-12-17 22:05:11 -0800175 def _is_cros_host(self):
176 """Check if a servo host is running chromeos.
177
178 @return: True if the servo host is running chromeos.
179 False if it isn't, or we don't have enough information.
180 """
181 try:
182 result = self.run('grep -q CHROMEOS /etc/lsb-release',
183 ignore_status=True, timeout=10)
184 except (error.AutoservRunError, error.AutoservSSHTimeout):
185 return False
186 return result.exit_status == 0
187
188
Fang Deng5d518f42013-08-02 14:04:32 -0700189 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
190 connect_timeout=None, alive_interval=None):
191 """Override default make_ssh_command to use tuned options.
192
193 Tuning changes:
194 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
195 connection failure. Consistency with remote_access.py.
196
197 - ServerAliveInterval=180; which causes SSH to ping connection every
198 180 seconds. In conjunction with ServerAliveCountMax ensures
199 that if the connection dies, Autotest will bail out quickly.
200
201 - ServerAliveCountMax=3; consistency with remote_access.py.
202
203 - ConnectAttempts=4; reduce flakiness in connection errors;
204 consistency with remote_access.py.
205
206 - UserKnownHostsFile=/dev/null; we don't care about the keys.
207
208 - SSH protocol forced to 2; needed for ServerAliveInterval.
209
210 @param user User name to use for the ssh connection.
211 @param port Port on the target host to use for ssh connection.
212 @param opts Additional options to the ssh command.
213 @param hosts_file Ignored.
214 @param connect_timeout Ignored.
215 @param alive_interval Ignored.
216
217 @returns: An ssh command with the requested settings.
218
219 """
220 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
221 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
222 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
223 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
224 ' -o Protocol=2 -l %s -p %d')
225 return base_command % (opts, user, port)
226
227
228 def _make_scp_cmd(self, sources, dest):
229 """Format scp command.
230
231 Given a list of source paths and a destination path, produces the
232 appropriate scp command for encoding it. Remote paths must be
233 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
234 to allow additional ssh options.
235
236 @param sources: A list of source paths to copy from.
237 @param dest: Destination path to copy to.
238
239 @returns: An scp command that copies |sources| on local machine to
240 |dest| on the remote servo host.
241
242 """
243 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
244 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
245 return command % (self.master_ssh_option,
246 self.port, ' '.join(sources), dest)
247
248
249 def run(self, command, timeout=3600, ignore_status=False,
250 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
251 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
252 """Run a command on the servo host.
253
254 Extends method `run` in SSHHost. If the servo host is a remote device,
255 it will call `run` in SSHost without changing anything.
256 If the servo host is 'localhost', it will call utils.system_output.
257
258 @param command: The command line string.
259 @param timeout: Time limit in seconds before attempting to
260 kill the running process. The run() function
261 will take a few seconds longer than 'timeout'
262 to complete if it has to kill the process.
263 @param ignore_status: Do not raise an exception, no matter
264 what the exit code of the command is.
265 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
266 @param connect_timeout: SSH connection timeout (in seconds)
267 Ignored if host is 'localhost'.
268 @param options: String with additional ssh command options
269 Ignored if host is 'localhost'.
270 @param stdin: Stdin to pass (a string) to the executed command.
271 @param verbose: Log the commands.
272 @param args: Sequence of strings to pass as arguments to command by
273 quoting them in " and escaping their contents if necessary.
274
275 @returns: A utils.CmdResult object.
276
277 @raises AutoservRunError if the command failed.
278 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
279 when servo host is not 'localhost'.
280
281 """
282 run_args = {'command': command, 'timeout': timeout,
283 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
284 'stderr_tee': stderr_tee, 'stdin': stdin,
285 'verbose': verbose, 'args': args}
286 if self.is_localhost():
287 if self._sudo_required:
288 run_args['command'] = 'sudo -n %s' % command
289 try:
290 return utils.run(**run_args)
291 except error.CmdError as e:
292 logging.error(e)
293 raise error.AutoservRunError('command execution error',
294 e.result_obj)
295 else:
296 run_args['connect_timeout'] = connect_timeout
297 run_args['options'] = options
298 return super(ServoHost, self).run(**run_args)
299
300
Dan Shi33412a82014-06-10 15:12:27 -0700301 @_timer.decorate
Fang Deng5d518f42013-08-02 14:04:32 -0700302 def _check_servod(self):
303 """A sanity check of the servod state."""
304 msg_prefix = 'Servod error: %s'
305 error_msg = None
306 try:
307 timeout, _ = retry.timeout(
308 self._servod_server.get, args=('pwr_button', ),
309 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
310 if timeout:
311 error_msg = msg_prefix % 'Request timed out.'
312 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
313 error_msg = msg_prefix % e
314 if error_msg:
315 raise ServoHostVerifyFailure(error_msg)
316
317
318 def _check_servo_host_usb(self):
319 """A sanity check of the USB device.
320
J. Richard Barnettee41d5e52014-08-29 17:41:36 -0700321 Test that the USB stick has been properly unplugged. An old
322 kernel bug sometimes allowed the USB stick block device node
323 to be wedged such that it couldn't be unplugged.
Fang Deng5d518f42013-08-02 14:04:32 -0700324
J. Richard Barnettee41d5e52014-08-29 17:41:36 -0700325 Servo initialization unplugs the stick, so as a prophylactic
326 against a regression, we check that the USB stick is
327 actually unplugged. (For reference, see crbug.com/225932.)
328
329 @raises ServoHostVerifyFailure if /dev/sda exists
Fang Deng5d518f42013-08-02 14:04:32 -0700330
331 """
332 try:
333 # The following test exits with a non-zero code
334 # and raises AutoserverRunError if error is detected.
J. Richard Barnettee41d5e52014-08-29 17:41:36 -0700335 self.run('test ! -b /dev/sda')
Fang Deng5d518f42013-08-02 14:04:32 -0700336 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
337 raise ServoHostVerifyFailure(
338 'USB sanity check on %s failed: %s' % (self.hostname, e))
339
340
Dan Shi33412a82014-06-10 15:12:27 -0700341 def _check_servo_config(self):
342 """Check if config file exists for servod.
343
344 If servod config file does not exist, there is no need to verify if
345 servo is working. The servo could be attached to a board not supported
346 yet.
347
348 @raises ServoHostVerifyFailure if /var/lib/servod/config does not exist.
349
350 """
351 try:
352 self.run('test -f /var/lib/servod/config')
353 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
Ricky Liang86b80182014-06-13 14:39:42 +0800354 if not self._is_cros_host():
355 logging.info('Ignoring servo config check failure, either %s '
356 'is not running chromeos or we cannot find enough '
357 'information about the host.', self.hostname)
358 return
Dan Shi33412a82014-06-10 15:12:27 -0700359 raise ServoHostVerifyFailure(
360 'Servo config file check failed for %s: %s' %
361 (self.hostname, e))
362
363
Dan Shie5b3c512014-08-21 12:12:09 -0700364 def _check_servod_status(self):
365 """Check if servod process is running.
366
367 If servod is not running, there is no need to verify if servo is
368 working. Check the process before making any servod call can avoid
369 long timeout that eventually fail any servod call.
370 If the servo host is set to localhost, failure of servod status check
371 will be ignored, as servo call may use ssh tunnel.
372
373 @raises ServoHostVerifyFailure if servod process does not exist.
374
375 """
376 try:
Dan Shi18040e42014-09-03 11:14:00 -0700377 pids = [str(int(s)) for s in
378 self.run('pgrep servod').stdout.strip().split('\n')]
379 logging.info('servod is running, PID=%s', ','.join(pids))
Dan Shie5b3c512014-08-21 12:12:09 -0700380 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
381 if self._is_localhost:
382 logging.info('Ignoring servod status check failure. servo host '
383 'is set to localhost, servo call may use ssh '
384 'tunnel to go through.')
385 else:
386 raise ServoHostVerifyFailure(
387 'Servod status check failed for %s: %s' %
388 (self.hostname, e))
389
390
beeps5e8c45a2013-12-17 22:05:11 -0800391 @_timer.decorate
392 def _update_image(self):
393 """Update the image on the servo host, if needed.
394
395 This method does nothing for servo hosts that are not running chromeos.
396 If the host is running chromeos, and a newer image is available on the
397 devserver, trigger a download and apply it in the background. If an
398 update has already been downloaded and applied, reboot the servo host
399 into the new image. If update_engine_client is in the process of
400 applying an update that was triggered on a previous invocation, do
401 nothing.
402
403 @raises dev_server.DevServerException: If all the devservers are down.
404 @raises site_utils.ParseBuildNameException: If the devserver returns
405 an invalid build name.
406 @raises autoupdater.ChromiumOSError: If something goes wrong in the
407 checking update engine client status or applying an update.
408 @raises AutoservRunError: If the update_engine_client isn't present on
409 the host, and the host is a cros_host.
410 """
411 #TODO(beeps): Remove this check once all servo hosts are using chromeos.
412 if not self._is_cros_host():
413 logging.info('Not attempting an update, either %s is not running '
414 'chromeos or we cannot find enough information about '
415 'the host.', self.hostname)
416 return
417
418 update_branch = global_config.global_config.get_config_value(
419 'CROS', 'servo_builder')
420 ds = dev_server.ImageServer.resolve(self.hostname)
421 latest_build = ds.get_latest_build_in_server(target=update_branch)
422
423 # We might have just purged all the beaglebone builds on the devserver
424 # after having triggered a download the last time we verified this
425 # beaglebone, so we still need to reboot if necessary.
426 if latest_build is None:
427 logging.debug('Could not find any builds for %s on %s',
428 update_branch, ds.url())
429 url = ds.url()
430 latest_build_number = None
431 else:
432 latest_build = '%s/%s' % (update_branch, latest_build)
433 latest_build_number = server_site_utils.ParseBuildName(
434 latest_build)[3]
435 url = ds.get_update_url(latest_build)
436
437 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
438 current_build_number = updater.get_build_id()
439 status = updater.check_update_status()
440
441 if status == autoupdater.UPDATER_NEED_REBOOT:
442 logging.info('Rebooting beaglebone host %s with build %s',
443 self.hostname, current_build_number)
444 kwargs = {
445 'reboot_cmd': ('((reboot & sleep 10; reboot -f &) '
446 '</dev/null >/dev/null 2>&1 &)'),
447 'fastsync': True,
448 'label': None,
Dan Shiddd7a0e2014-04-29 11:55:34 -0700449 'wait': False,
beeps5e8c45a2013-12-17 22:05:11 -0800450 }
Dan Shiddd7a0e2014-04-29 11:55:34 -0700451 # Do not wait for reboot to complete. Otherwise, self.reboot call
452 # will log reboot failure if servo does not come back. The logged
453 # reboot failure will lead to test job failure. If the test does not
454 # require servo, we don't want servo failure to fail the test with
455 # error: `Host did not return from reboot` in status.log
456 # If servo does not come back after reboot, exception needs to be
457 # raised, so test requires servo should fail.
beeps5e8c45a2013-12-17 22:05:11 -0800458 self.reboot(**kwargs)
Dan Shiddd7a0e2014-04-29 11:55:34 -0700459 if self.wait_up(timeout=120):
460 current_build_number = updater.get_build_id()
461 logging.info('servo host %s back from reboot, with build %s',
462 self.hostname, current_build_number)
463 else:
464 raise error.AutoservHostError(
465 'servo host %s failed to come back from reboot.' %
466 self.hostname)
beeps5e8c45a2013-12-17 22:05:11 -0800467
468 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
469 logging.info('servo host %s already processing an update, update '
470 'engine client status=%s', self.hostname, status)
471 elif (latest_build_number and
472 current_build_number != latest_build_number):
473 logging.info('Using devserver url: %s to trigger update on '
474 'servo host %s, from %s to %s', url, self.hostname,
475 current_build_number, latest_build_number)
476 try:
477 updater.trigger_update()
478 except autoupdater.RootFSUpdateError as e:
479 trigger_download_status = 'failed with %s' % str(e)
480 stats.Counter('servo_host.RootFSUpdateError').increment()
481 else:
482 trigger_download_status = 'passed'
483 logging.info('Triggered download and update %s for %s, '
484 'update engine currently in status %s',
485 trigger_download_status, self.hostname,
486 updater.check_update_status())
487 else:
488 logging.info('servo host %s does not require an update.',
489 self.hostname)
490
491
Fang Deng5d518f42013-08-02 14:04:32 -0700492 def verify_software(self):
beeps5e8c45a2013-12-17 22:05:11 -0800493 """Update the servo host and verify it's in a good state.
Fang Deng5d518f42013-08-02 14:04:32 -0700494
495 It overrides the base class function for verify_software.
beeps5e8c45a2013-12-17 22:05:11 -0800496 If an update is available, downloads and applies it. Then verifies:
Fang Deng5d518f42013-08-02 14:04:32 -0700497 1) Whether basic servo command can run successfully.
498 2) Whether USB is in a good state. crbug.com/225932
499
500 @raises ServoHostVerifyFailure if servo host does not pass the checks.
501
502 """
beeps5e8c45a2013-12-17 22:05:11 -0800503 logging.info('Applying an update to the servo host, if necessary.')
504 self._update_image()
Dan Shi33412a82014-06-10 15:12:27 -0700505 self._check_servo_config()
Dan Shie5b3c512014-08-21 12:12:09 -0700506 self._check_servod_status()
507
Dan Shi4d478522014-02-14 13:46:32 -0800508 # If servo is already initialized, we don't need to do it again, call
509 # _check_servod should be enough.
510 if self._servo:
511 self._check_servod()
512 else:
513 self._servo = servo.Servo(servo_host=self)
Dan Shie5b3c512014-08-21 12:12:09 -0700514 timeout, _ = retry.timeout(
515 self._servo.initialize_dut,
516 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
517 if timeout:
518 raise ServoHostVerifyFailure('Servo initialize timed out.')
Dan Shi4d478522014-02-14 13:46:32 -0800519
J. Richard Barnettee41d5e52014-08-29 17:41:36 -0700520 self._check_servo_host_usb()
Fang Deng5d518f42013-08-02 14:04:32 -0700521 logging.info('Sanity checks pass on servo host %s', self.hostname)
522
523
524 def _repair_with_sysrq_reboot(self):
525 """Reboot with magic SysRq key."""
526 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
527 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
528 reboot_cmd='echo "b" > /proc/sysrq-trigger &',
529 fastsync=True)
530 time.sleep(self.REBOOT_DELAY_SECS)
531
532
Fang Dengd4fe7392013-09-20 12:18:21 -0700533 def has_power(self):
534 """Return whether or not the servo host is powered by PoE."""
535 # TODO(fdeng): See crbug.com/302791
536 # For now, assume all servo hosts in the lab have power.
537 return self.is_in_lab()
538
539
540 def power_cycle(self):
541 """Cycle power to this host via PoE if it is a lab device.
542
543 @raises ServoHostRepairFailure if it fails to power cycle the
544 servo host.
545
546 """
547 if self.has_power():
548 try:
549 rpm_client.set_power(self.hostname, 'CYCLE')
550 except (socket.error, xmlrpclib.Error,
551 httplib.BadStatusLine,
552 rpm_client.RemotePowerException) as e:
553 raise ServoHostRepairFailure(
554 'Power cycling %s failed: %s' % (self.hostname, e))
555 else:
556 logging.info('Skipping power cycling, not a lab device.')
557
558
Fang Deng5d518f42013-08-02 14:04:32 -0700559 def _powercycle_to_repair(self):
Fang Dengd4fe7392013-09-20 12:18:21 -0700560 """Power cycle the servo host using PoE.
561
562 @raises ServoHostRepairFailure if it fails to fix the servo host.
Fang Dengf0ea6142013-10-10 21:43:16 -0700563 @raises ServoHostRepairMethodNA if it does not support power.
Fang Dengd4fe7392013-09-20 12:18:21 -0700564
565 """
566 if not self.has_power():
Fang Dengf0ea6142013-10-10 21:43:16 -0700567 raise ServoHostRepairMethodNA('%s does not support power.' %
568 self.hostname)
Fang Dengd4fe7392013-09-20 12:18:21 -0700569 logging.info('Attempting repair via PoE powercycle.')
570 failed_cycles = 0
571 self.power_cycle()
572 while not self.wait_up(timeout=self.REBOOT_TIMEOUT_SECS):
573 failed_cycles += 1
574 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
575 raise ServoHostRepairFailure(
576 'Powercycled host %s %d times; device did not come back'
577 ' online.' % (self.hostname, failed_cycles))
578 self.power_cycle()
579 logging.info('Powercycling was successful after %d failures.',
580 failed_cycles)
581 # Allow some time for servod to get started.
582 time.sleep(self.REBOOT_DELAY_SECS)
Fang Deng5d518f42013-08-02 14:04:32 -0700583
584
585 def repair_full(self):
586 """Attempt to repair servo host.
587
588 This overrides the base class function for repair.
589 Note if the host is not in Cros Lab, the repair procedure
590 will be skipped.
591
592 @raises ServoHostRepairTotalFailure if all attempts fail.
593
594 """
595 if not self.is_in_lab():
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700596 logging.warning('Skip repairing servo host %s: Not a lab device.',
Fang Deng5d518f42013-08-02 14:04:32 -0700597 self.hostname)
598 return
599 logging.info('Attempting to repair servo host %s.', self.hostname)
Dan Shi4d478522014-02-14 13:46:32 -0800600 # Reset the cache to guarantee servo initialization being called later.
601 self._servo = None
Dan Shi0cf92c82014-02-20 15:45:01 -0800602 # TODO(dshi): add self._powercycle_to_repair back to repair_funcs
603 # after crbug.com/336606 is fixed.
604 repair_funcs = [self._repair_with_sysrq_reboot,]
Fang Deng5d518f42013-08-02 14:04:32 -0700605 errors = []
606 for repair_func in repair_funcs:
Fang Dengf0ea6142013-10-10 21:43:16 -0700607 counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
Fang Deng5d518f42013-08-02 14:04:32 -0700608 try:
609 repair_func()
610 self.verify()
Fang Dengf0ea6142013-10-10 21:43:16 -0700611 stats.Counter(counter_prefix + 'SUCCEEDED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700612 return
Fang Dengf0ea6142013-10-10 21:43:16 -0700613 except ServoHostRepairMethodNA as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700614 logging.warning('Repair method NA: %s', e)
Fang Dengf0ea6142013-10-10 21:43:16 -0700615 stats.Counter(counter_prefix + 'RepairNA').increment()
616 errors.append(str(e))
Fang Deng5d518f42013-08-02 14:04:32 -0700617 except Exception as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700618 logging.warning('Failed to repair servo: %s', e)
Fang Dengf0ea6142013-10-10 21:43:16 -0700619 stats.Counter(counter_prefix + 'FAILED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700620 errors.append(str(e))
Fang Dengf0ea6142013-10-10 21:43:16 -0700621 stats.Counter('servo_host_repair.Full_Repair_Failed').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700622 raise ServoHostRepairTotalFailure(
623 'All attempts at repairing the servo failed:\n%s' %
624 '\n'.join(errors))
625
626
Dan Shi4d478522014-02-14 13:46:32 -0800627 def get_servo(self):
628 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700629
Dan Shi4d478522014-02-14 13:46:32 -0800630 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700631 """
Dan Shi4d478522014-02-14 13:46:32 -0800632 return self._servo
633
634
635def create_servo_host(dut, servo_args):
636 """Create a ServoHost object.
637
638 There three possible cases:
639 1) If the DUT is in Cros Lab and has a beaglebone and a servo, then
640 create a ServoHost object pointing to the beaglebone. servo_args
641 is ignored.
642 2) If not case 1) and servo_args is neither None nor empty, then
643 create a ServoHost object using servo_args.
644 3) If neither case 1) or 2) applies, return None.
645 When the servo_args is not None, we assume the servo is required by the
646 test. If servo failed to be verified, we will attempt to repair it. If servo
647 is not required, we will initialize ServoHost without initializing a servo
648 object.
649
650 @param dut: host name of the host that servo connects. It can be used to
651 lookup the servo in test lab using naming convention.
652 @param servo_args: A dictionary that contains args for creating
653 a ServoHost object,
654 e.g. {'servo_host': '172.11.11.111',
655 'servo_port': 9999}.
656 See comments above.
657
658 @returns: A ServoHost object or None. See comments above.
659
660 """
661 lab_servo_hostname = make_servo_hostname(dut)
662 is_in_lab = utils.host_is_in_lab_zone(lab_servo_hostname)
663
664 if is_in_lab:
Christopher Wileycef1f902014-06-19 11:11:23 -0700665 # Technically, this duplicates the SSH ping done early in the servo
666 # proxy initialization code. However, this ping ends in a couple
667 # seconds when if fails, rather than the 60 seconds it takes to decide
668 # that an SSH ping has timed out. Specifically, that timeout happens
669 # when our servo DNS name resolves, but there is no host at that IP.
670 # TODO(dshi): crbug.com/380773 Remove this ping check once the bug is
671 # fixed. Autotest should not try to verify servo if servo is
672 # not required for the test.
673 ping_config = ping_runner.PingConfig(
674 lab_servo_hostname, count=3,
675 ignore_result=True, ignore_status=True)
676 logging.info('Pinging servo at %s', lab_servo_hostname)
677 host_is_up = ping_runner.PingRunner().ping(ping_config).received > 0
678 if host_is_up:
679 return ServoHost(servo_host=lab_servo_hostname, is_in_lab=is_in_lab,
680 required_by_test=(servo_args is not None))
Dan Shi4d478522014-02-14 13:46:32 -0800681 elif servo_args is not None:
682 return ServoHost(required_by_test=True, is_in_lab=False, **servo_args)
683 else:
Dan Shibbb0cb62014-03-24 17:50:57 -0700684 return None