blob: cbe609980ab4ef6d62d1b582c01f4560cf52d491 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
21from autotest_lib.client.common_lib.cros import autoupdater
22from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070023from autotest_lib.client.common_lib.cros import retry
Gabe Black1e1c41b2015-02-04 23:55:15 -080024from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Christopher Wileycef1f902014-06-19 11:11:23 -070025from autotest_lib.client.common_lib.cros.network import ping_runner
beeps5e8c45a2013-12-17 22:05:11 -080026from autotest_lib.server import site_utils as server_site_utils
Fang Deng5d518f42013-08-02 14:04:32 -070027from autotest_lib.server.cros.servo import servo
Simran Basi0739d682015-02-25 16:22:56 -080028from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Fang Deng5d518f42013-08-02 14:04:32 -070029from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070030from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070031
32
Simran Basi0739d682015-02-25 16:22:56 -080033# Names of the host attributes in the database that represent the values for
34# the servo_host and servo_port for a servo connected to the DUT.
35SERVO_HOST_ATTR = 'servo_host'
36SERVO_PORT_ATTR = 'servo_port'
37
38
Fang Deng5d518f42013-08-02 14:04:32 -070039class ServoHostException(error.AutoservError):
40 """This is the base class for exceptions raised by ServoHost."""
41 pass
42
43
44class ServoHostVerifyFailure(ServoHostException):
45 """Raised when servo verification fails."""
46 pass
47
48
Fang Dengd4fe7392013-09-20 12:18:21 -070049class ServoHostRepairFailure(ServoHostException):
50 """Raised when a repair method fails to repair a servo host."""
51 pass
52
53
Fang Dengf0ea6142013-10-10 21:43:16 -070054class ServoHostRepairMethodNA(ServoHostException):
55 """Raised when a repair method is not applicable."""
56 pass
57
58
Fang Deng5d518f42013-08-02 14:04:32 -070059class ServoHostRepairTotalFailure(ServoHostException):
60 """Raised if all attempts to repair a servo host fail."""
61 pass
62
63
64def make_servo_hostname(dut_hostname):
65 """Given a DUT's hostname, return the hostname of its servo.
66
67 @param dut_hostname: hostname of a DUT.
68
69 @return hostname of the DUT's servo.
70
71 """
72 host_parts = dut_hostname.split('.')
73 host_parts[0] = host_parts[0] + '-servo'
74 return '.'.join(host_parts)
75
76
77class ServoHost(ssh_host.SSHHost):
78 """Host class for a host that controls a servo, e.g. beaglebone."""
79
80 # Timeout for getting the value of 'pwr_button'.
81 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
82 # Timeout for rebooting servo host.
83 REBOOT_TIMEOUT_SECS = 90
84 HOST_DOWN_TIMEOUT_SECS = 60
85 # Delay after rebooting for servod to become fully functional.
86 REBOOT_DELAY_SECS = 20
87 # Servod process name.
88 SERVOD_PROCESS = 'servod'
Dan Shie5b3c512014-08-21 12:12:09 -070089 # Timeout for initializing servo signals.
90 INITIALIZE_SERVO_TIMEOUT_SECS = 30
Fang Deng5d518f42013-08-02 14:04:32 -070091
Fang Dengd4fe7392013-09-20 12:18:21 -070092 _MAX_POWER_CYCLE_ATTEMPTS = 3
Gabe Black1e1c41b2015-02-04 23:55:15 -080093 _timer = autotest_stats.Timer('servo_host')
Fang Dengd4fe7392013-09-20 12:18:21 -070094
Fang Deng5d518f42013-08-02 14:04:32 -070095
96 def _initialize(self, servo_host='localhost', servo_port=9999,
Dan Shi4d478522014-02-14 13:46:32 -080097 required_by_test=True, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -070098 """Initialize a ServoHost instance.
99
100 A ServoHost instance represents a host that controls a servo.
101
102 @param servo_host: Name of the host where the servod process
103 is running.
104 @param servo_port: Port the servod process is listening on.
Dan Shi4d478522014-02-14 13:46:32 -0800105 @param required_by_test: True if servo is required by test.
106 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
107 to None, for which utils.host_is_in_lab_zone will be
108 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700109
110 """
111 super(ServoHost, self)._initialize(hostname=servo_host,
112 *args, **dargs)
Dan Shi4d478522014-02-14 13:46:32 -0800113 if is_in_lab is None:
114 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
115 else:
116 self._is_in_lab = is_in_lab
Fang Deng5d518f42013-08-02 14:04:32 -0700117 self._is_localhost = (self.hostname == 'localhost')
118 remote = 'http://%s:%s' % (self.hostname, servo_port)
119 self._servod_server = xmlrpclib.ServerProxy(remote)
120 # Commands on the servo host must be run by the superuser. Our account
121 # on Beaglebone is root, but locally we might be running as a
122 # different user. If so - `sudo ' will have to be added to the
123 # commands.
124 if self._is_localhost:
125 self._sudo_required = utils.system_output('id -u') != '0'
126 else:
127 self._sudo_required = False
Dan Shi4d478522014-02-14 13:46:32 -0800128 # Create a cache of Servo object. This must be called at the end of
129 # _initialize to make sure all attributes are set.
130 self._servo = None
131 try:
132 self.verify()
Alex Millercc589692014-04-21 18:00:22 -0700133 except Exception:
Dan Shibbb0cb62014-03-24 17:50:57 -0700134 if required_by_test:
135 if not self.is_in_lab():
136 raise
137 else:
138 self.repair_full()
Fang Deng5d518f42013-08-02 14:04:32 -0700139
140
141 def is_in_lab(self):
142 """Check whether the servo host is a lab device.
143
144 @returns: True if the servo host is in Cros Lab, otherwise False.
145
146 """
147 return self._is_in_lab
148
149
150 def is_localhost(self):
151 """Checks whether the servo host points to localhost.
152
153 @returns: True if it points to localhost, otherwise False.
154
155 """
156 return self._is_localhost
157
158
159 def get_servod_server_proxy(self):
160 """Return a proxy that can be used to communicate with servod server.
161
162 @returns: An xmlrpclib.ServerProxy that is connected to the servod
163 server on the host.
164
165 """
166 return self._servod_server
167
168
169 def get_wait_up_processes(self):
170 """Get the list of local processes to wait for in wait_up.
171
172 Override get_wait_up_processes in
173 autotest_lib.client.common_lib.hosts.base_classes.Host.
174 Wait for servod process to go up. Called by base class when
175 rebooting the device.
176
177 """
178 processes = [self.SERVOD_PROCESS]
179 return processes
180
181
beeps5e8c45a2013-12-17 22:05:11 -0800182 def _is_cros_host(self):
183 """Check if a servo host is running chromeos.
184
185 @return: True if the servo host is running chromeos.
186 False if it isn't, or we don't have enough information.
187 """
188 try:
189 result = self.run('grep -q CHROMEOS /etc/lsb-release',
190 ignore_status=True, timeout=10)
191 except (error.AutoservRunError, error.AutoservSSHTimeout):
192 return False
193 return result.exit_status == 0
194
195
Fang Deng5d518f42013-08-02 14:04:32 -0700196 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
197 connect_timeout=None, alive_interval=None):
198 """Override default make_ssh_command to use tuned options.
199
200 Tuning changes:
201 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
202 connection failure. Consistency with remote_access.py.
203
204 - ServerAliveInterval=180; which causes SSH to ping connection every
205 180 seconds. In conjunction with ServerAliveCountMax ensures
206 that if the connection dies, Autotest will bail out quickly.
207
208 - ServerAliveCountMax=3; consistency with remote_access.py.
209
210 - ConnectAttempts=4; reduce flakiness in connection errors;
211 consistency with remote_access.py.
212
213 - UserKnownHostsFile=/dev/null; we don't care about the keys.
214
215 - SSH protocol forced to 2; needed for ServerAliveInterval.
216
217 @param user User name to use for the ssh connection.
218 @param port Port on the target host to use for ssh connection.
219 @param opts Additional options to the ssh command.
220 @param hosts_file Ignored.
221 @param connect_timeout Ignored.
222 @param alive_interval Ignored.
223
224 @returns: An ssh command with the requested settings.
225
226 """
227 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
228 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
229 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
230 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
231 ' -o Protocol=2 -l %s -p %d')
232 return base_command % (opts, user, port)
233
234
235 def _make_scp_cmd(self, sources, dest):
236 """Format scp command.
237
238 Given a list of source paths and a destination path, produces the
239 appropriate scp command for encoding it. Remote paths must be
240 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
241 to allow additional ssh options.
242
243 @param sources: A list of source paths to copy from.
244 @param dest: Destination path to copy to.
245
246 @returns: An scp command that copies |sources| on local machine to
247 |dest| on the remote servo host.
248
249 """
250 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
251 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
252 return command % (self.master_ssh_option,
253 self.port, ' '.join(sources), dest)
254
255
256 def run(self, command, timeout=3600, ignore_status=False,
257 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
258 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
259 """Run a command on the servo host.
260
261 Extends method `run` in SSHHost. If the servo host is a remote device,
262 it will call `run` in SSHost without changing anything.
263 If the servo host is 'localhost', it will call utils.system_output.
264
265 @param command: The command line string.
266 @param timeout: Time limit in seconds before attempting to
267 kill the running process. The run() function
268 will take a few seconds longer than 'timeout'
269 to complete if it has to kill the process.
270 @param ignore_status: Do not raise an exception, no matter
271 what the exit code of the command is.
272 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
273 @param connect_timeout: SSH connection timeout (in seconds)
274 Ignored if host is 'localhost'.
275 @param options: String with additional ssh command options
276 Ignored if host is 'localhost'.
277 @param stdin: Stdin to pass (a string) to the executed command.
278 @param verbose: Log the commands.
279 @param args: Sequence of strings to pass as arguments to command by
280 quoting them in " and escaping their contents if necessary.
281
282 @returns: A utils.CmdResult object.
283
284 @raises AutoservRunError if the command failed.
285 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
286 when servo host is not 'localhost'.
287
288 """
289 run_args = {'command': command, 'timeout': timeout,
290 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
291 'stderr_tee': stderr_tee, 'stdin': stdin,
292 'verbose': verbose, 'args': args}
293 if self.is_localhost():
294 if self._sudo_required:
295 run_args['command'] = 'sudo -n %s' % command
296 try:
297 return utils.run(**run_args)
298 except error.CmdError as e:
299 logging.error(e)
300 raise error.AutoservRunError('command execution error',
301 e.result_obj)
302 else:
303 run_args['connect_timeout'] = connect_timeout
304 run_args['options'] = options
305 return super(ServoHost, self).run(**run_args)
306
307
Dan Shi33412a82014-06-10 15:12:27 -0700308 @_timer.decorate
Fang Deng5d518f42013-08-02 14:04:32 -0700309 def _check_servod(self):
310 """A sanity check of the servod state."""
311 msg_prefix = 'Servod error: %s'
312 error_msg = None
313 try:
314 timeout, _ = retry.timeout(
315 self._servod_server.get, args=('pwr_button', ),
316 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
317 if timeout:
318 error_msg = msg_prefix % 'Request timed out.'
319 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
320 error_msg = msg_prefix % e
321 if error_msg:
322 raise ServoHostVerifyFailure(error_msg)
323
324
Dan Shi33412a82014-06-10 15:12:27 -0700325 def _check_servo_config(self):
326 """Check if config file exists for servod.
327
328 If servod config file does not exist, there is no need to verify if
329 servo is working. The servo could be attached to a board not supported
330 yet.
331
332 @raises ServoHostVerifyFailure if /var/lib/servod/config does not exist.
333
334 """
Simran Basi0739d682015-02-25 16:22:56 -0800335 if self._is_localhost:
336 return
Dan Shi33412a82014-06-10 15:12:27 -0700337 try:
338 self.run('test -f /var/lib/servod/config')
339 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
Ricky Liang86b80182014-06-13 14:39:42 +0800340 if not self._is_cros_host():
341 logging.info('Ignoring servo config check failure, either %s '
342 'is not running chromeos or we cannot find enough '
343 'information about the host.', self.hostname)
344 return
Dan Shi33412a82014-06-10 15:12:27 -0700345 raise ServoHostVerifyFailure(
346 'Servo config file check failed for %s: %s' %
347 (self.hostname, e))
348
349
Dan Shie5b3c512014-08-21 12:12:09 -0700350 def _check_servod_status(self):
351 """Check if servod process is running.
352
353 If servod is not running, there is no need to verify if servo is
354 working. Check the process before making any servod call can avoid
355 long timeout that eventually fail any servod call.
356 If the servo host is set to localhost, failure of servod status check
357 will be ignored, as servo call may use ssh tunnel.
358
359 @raises ServoHostVerifyFailure if servod process does not exist.
360
361 """
362 try:
Dan Shi18040e42014-09-03 11:14:00 -0700363 pids = [str(int(s)) for s in
364 self.run('pgrep servod').stdout.strip().split('\n')]
365 logging.info('servod is running, PID=%s', ','.join(pids))
Dan Shie5b3c512014-08-21 12:12:09 -0700366 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
367 if self._is_localhost:
368 logging.info('Ignoring servod status check failure. servo host '
369 'is set to localhost, servo call may use ssh '
370 'tunnel to go through.')
371 else:
372 raise ServoHostVerifyFailure(
373 'Servod status check failed for %s: %s' %
374 (self.hostname, e))
375
376
beeps5e8c45a2013-12-17 22:05:11 -0800377 @_timer.decorate
378 def _update_image(self):
379 """Update the image on the servo host, if needed.
380
381 This method does nothing for servo hosts that are not running chromeos.
382 If the host is running chromeos, and a newer image is available on the
383 devserver, trigger a download and apply it in the background. If an
384 update has already been downloaded and applied, reboot the servo host
385 into the new image. If update_engine_client is in the process of
386 applying an update that was triggered on a previous invocation, do
387 nothing.
388
389 @raises dev_server.DevServerException: If all the devservers are down.
390 @raises site_utils.ParseBuildNameException: If the devserver returns
391 an invalid build name.
392 @raises autoupdater.ChromiumOSError: If something goes wrong in the
393 checking update engine client status or applying an update.
394 @raises AutoservRunError: If the update_engine_client isn't present on
395 the host, and the host is a cros_host.
396 """
397 #TODO(beeps): Remove this check once all servo hosts are using chromeos.
398 if not self._is_cros_host():
399 logging.info('Not attempting an update, either %s is not running '
400 'chromeos or we cannot find enough information about '
401 'the host.', self.hostname)
402 return
403
404 update_branch = global_config.global_config.get_config_value(
405 'CROS', 'servo_builder')
406 ds = dev_server.ImageServer.resolve(self.hostname)
407 latest_build = ds.get_latest_build_in_server(target=update_branch)
408
409 # We might have just purged all the beaglebone builds on the devserver
410 # after having triggered a download the last time we verified this
411 # beaglebone, so we still need to reboot if necessary.
412 if latest_build is None:
413 logging.debug('Could not find any builds for %s on %s',
414 update_branch, ds.url())
415 url = ds.url()
416 latest_build_number = None
417 else:
418 latest_build = '%s/%s' % (update_branch, latest_build)
419 latest_build_number = server_site_utils.ParseBuildName(
420 latest_build)[3]
421 url = ds.get_update_url(latest_build)
422
423 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
424 current_build_number = updater.get_build_id()
425 status = updater.check_update_status()
426
427 if status == autoupdater.UPDATER_NEED_REBOOT:
428 logging.info('Rebooting beaglebone host %s with build %s',
429 self.hostname, current_build_number)
430 kwargs = {
431 'reboot_cmd': ('((reboot & sleep 10; reboot -f &) '
432 '</dev/null >/dev/null 2>&1 &)'),
433 'fastsync': True,
434 'label': None,
Dan Shiddd7a0e2014-04-29 11:55:34 -0700435 'wait': False,
beeps5e8c45a2013-12-17 22:05:11 -0800436 }
Dan Shiddd7a0e2014-04-29 11:55:34 -0700437 # Do not wait for reboot to complete. Otherwise, self.reboot call
438 # will log reboot failure if servo does not come back. The logged
439 # reboot failure will lead to test job failure. If the test does not
440 # require servo, we don't want servo failure to fail the test with
441 # error: `Host did not return from reboot` in status.log
442 # If servo does not come back after reboot, exception needs to be
443 # raised, so test requires servo should fail.
beeps5e8c45a2013-12-17 22:05:11 -0800444 self.reboot(**kwargs)
Dan Shiddd7a0e2014-04-29 11:55:34 -0700445 if self.wait_up(timeout=120):
446 current_build_number = updater.get_build_id()
447 logging.info('servo host %s back from reboot, with build %s',
448 self.hostname, current_build_number)
449 else:
450 raise error.AutoservHostError(
451 'servo host %s failed to come back from reboot.' %
452 self.hostname)
beeps5e8c45a2013-12-17 22:05:11 -0800453
454 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
455 logging.info('servo host %s already processing an update, update '
456 'engine client status=%s', self.hostname, status)
457 elif (latest_build_number and
458 current_build_number != latest_build_number):
459 logging.info('Using devserver url: %s to trigger update on '
460 'servo host %s, from %s to %s', url, self.hostname,
461 current_build_number, latest_build_number)
462 try:
463 updater.trigger_update()
464 except autoupdater.RootFSUpdateError as e:
465 trigger_download_status = 'failed with %s' % str(e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800466 autotest_stats.Counter('servo_host.RootFSUpdateError'
467 ).increment()
beeps5e8c45a2013-12-17 22:05:11 -0800468 else:
469 trigger_download_status = 'passed'
470 logging.info('Triggered download and update %s for %s, '
471 'update engine currently in status %s',
472 trigger_download_status, self.hostname,
473 updater.check_update_status())
474 else:
475 logging.info('servo host %s does not require an update.',
476 self.hostname)
477
478
Fang Deng5d518f42013-08-02 14:04:32 -0700479 def verify_software(self):
beeps5e8c45a2013-12-17 22:05:11 -0800480 """Update the servo host and verify it's in a good state.
Fang Deng5d518f42013-08-02 14:04:32 -0700481
482 It overrides the base class function for verify_software.
beeps5e8c45a2013-12-17 22:05:11 -0800483 If an update is available, downloads and applies it. Then verifies:
Fang Deng5d518f42013-08-02 14:04:32 -0700484 1) Whether basic servo command can run successfully.
485 2) Whether USB is in a good state. crbug.com/225932
486
487 @raises ServoHostVerifyFailure if servo host does not pass the checks.
488
489 """
beeps5e8c45a2013-12-17 22:05:11 -0800490 logging.info('Applying an update to the servo host, if necessary.')
491 self._update_image()
Dan Shi33412a82014-06-10 15:12:27 -0700492 self._check_servo_config()
Dan Shie5b3c512014-08-21 12:12:09 -0700493 self._check_servod_status()
494
Dan Shi4d478522014-02-14 13:46:32 -0800495 # If servo is already initialized, we don't need to do it again, call
496 # _check_servod should be enough.
497 if self._servo:
498 self._check_servod()
499 else:
500 self._servo = servo.Servo(servo_host=self)
Dan Shie5b3c512014-08-21 12:12:09 -0700501 timeout, _ = retry.timeout(
502 self._servo.initialize_dut,
503 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
504 if timeout:
505 raise ServoHostVerifyFailure('Servo initialize timed out.')
Fang Deng5d518f42013-08-02 14:04:32 -0700506 logging.info('Sanity checks pass on servo host %s', self.hostname)
507
508
509 def _repair_with_sysrq_reboot(self):
510 """Reboot with magic SysRq key."""
511 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
512 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
513 reboot_cmd='echo "b" > /proc/sysrq-trigger &',
514 fastsync=True)
515 time.sleep(self.REBOOT_DELAY_SECS)
516
517
Fang Dengd4fe7392013-09-20 12:18:21 -0700518 def has_power(self):
519 """Return whether or not the servo host is powered by PoE."""
520 # TODO(fdeng): See crbug.com/302791
521 # For now, assume all servo hosts in the lab have power.
522 return self.is_in_lab()
523
524
525 def power_cycle(self):
526 """Cycle power to this host via PoE if it is a lab device.
527
528 @raises ServoHostRepairFailure if it fails to power cycle the
529 servo host.
530
531 """
532 if self.has_power():
533 try:
534 rpm_client.set_power(self.hostname, 'CYCLE')
535 except (socket.error, xmlrpclib.Error,
536 httplib.BadStatusLine,
537 rpm_client.RemotePowerException) as e:
538 raise ServoHostRepairFailure(
539 'Power cycling %s failed: %s' % (self.hostname, e))
540 else:
541 logging.info('Skipping power cycling, not a lab device.')
542
543
Fang Deng5d518f42013-08-02 14:04:32 -0700544 def _powercycle_to_repair(self):
Fang Dengd4fe7392013-09-20 12:18:21 -0700545 """Power cycle the servo host using PoE.
546
547 @raises ServoHostRepairFailure if it fails to fix the servo host.
Fang Dengf0ea6142013-10-10 21:43:16 -0700548 @raises ServoHostRepairMethodNA if it does not support power.
Fang Dengd4fe7392013-09-20 12:18:21 -0700549
550 """
551 if not self.has_power():
Fang Dengf0ea6142013-10-10 21:43:16 -0700552 raise ServoHostRepairMethodNA('%s does not support power.' %
553 self.hostname)
Fang Dengd4fe7392013-09-20 12:18:21 -0700554 logging.info('Attempting repair via PoE powercycle.')
555 failed_cycles = 0
556 self.power_cycle()
557 while not self.wait_up(timeout=self.REBOOT_TIMEOUT_SECS):
558 failed_cycles += 1
559 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
560 raise ServoHostRepairFailure(
561 'Powercycled host %s %d times; device did not come back'
562 ' online.' % (self.hostname, failed_cycles))
563 self.power_cycle()
564 logging.info('Powercycling was successful after %d failures.',
565 failed_cycles)
566 # Allow some time for servod to get started.
567 time.sleep(self.REBOOT_DELAY_SECS)
Fang Deng5d518f42013-08-02 14:04:32 -0700568
569
570 def repair_full(self):
571 """Attempt to repair servo host.
572
573 This overrides the base class function for repair.
574 Note if the host is not in Cros Lab, the repair procedure
575 will be skipped.
576
577 @raises ServoHostRepairTotalFailure if all attempts fail.
578
579 """
580 if not self.is_in_lab():
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700581 logging.warning('Skip repairing servo host %s: Not a lab device.',
Fang Deng5d518f42013-08-02 14:04:32 -0700582 self.hostname)
583 return
584 logging.info('Attempting to repair servo host %s.', self.hostname)
Dan Shi4d478522014-02-14 13:46:32 -0800585 # Reset the cache to guarantee servo initialization being called later.
586 self._servo = None
Dan Shi0cf92c82014-02-20 15:45:01 -0800587 # TODO(dshi): add self._powercycle_to_repair back to repair_funcs
588 # after crbug.com/336606 is fixed.
589 repair_funcs = [self._repair_with_sysrq_reboot,]
Fang Deng5d518f42013-08-02 14:04:32 -0700590 errors = []
591 for repair_func in repair_funcs:
Fang Dengf0ea6142013-10-10 21:43:16 -0700592 counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
Fang Deng5d518f42013-08-02 14:04:32 -0700593 try:
594 repair_func()
595 self.verify()
Gabe Black1e1c41b2015-02-04 23:55:15 -0800596 autotest_stats.Counter(counter_prefix + 'SUCCEEDED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700597 return
Fang Dengf0ea6142013-10-10 21:43:16 -0700598 except ServoHostRepairMethodNA as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700599 logging.warning('Repair method NA: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800600 autotest_stats.Counter(counter_prefix + 'RepairNA').increment()
Fang Dengf0ea6142013-10-10 21:43:16 -0700601 errors.append(str(e))
Fang Deng5d518f42013-08-02 14:04:32 -0700602 except Exception as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700603 logging.warning('Failed to repair servo: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800604 autotest_stats.Counter(counter_prefix + 'FAILED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700605 errors.append(str(e))
Gabe Black1e1c41b2015-02-04 23:55:15 -0800606 autotest_stats.Counter('servo_host_repair.Full_Repair_Failed'). \
607 increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700608 raise ServoHostRepairTotalFailure(
609 'All attempts at repairing the servo failed:\n%s' %
610 '\n'.join(errors))
611
612
Dan Shi4d478522014-02-14 13:46:32 -0800613 def get_servo(self):
614 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700615
Dan Shi4d478522014-02-14 13:46:32 -0800616 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700617 """
Dan Shi4d478522014-02-14 13:46:32 -0800618 return self._servo
619
620
Fang Denge545abb2014-12-30 18:43:47 -0800621def create_servo_host(dut, servo_args, try_lab_servo=False):
Dan Shi4d478522014-02-14 13:46:32 -0800622 """Create a ServoHost object.
623
Fang Denge545abb2014-12-30 18:43:47 -0800624 The `servo_args` parameter is a dictionary specifying optional
625 Servo client parameter overrides (i.e. a specific host or port).
626 When specified, the caller requires that an exception be raised
627 unless both the ServoHost and the Servo are successfully
628 created.
629
630 There are three possible cases:
631 1. If the DUT is in the Cros test lab then the ServoHost object
632 is only created for the host in the lab. Alternate host or
633 port settings in `servo_host` will be ignored.
634 2. When not case 1., but `servo_args` is not `None`, then create
635 a ServoHost object using `servo_args`.
636 3. Otherwise, return `None`.
637
638 When the `try_lab_servo` parameter is false, it indicates that a
639 ServoHost should not be created for a device in the Cros test
640 lab. The setting of `servo_args` takes precedence over the
641 setting of `try_lab_servo`.
Dan Shi4d478522014-02-14 13:46:32 -0800642
643 @param dut: host name of the host that servo connects. It can be used to
644 lookup the servo in test lab using naming convention.
645 @param servo_args: A dictionary that contains args for creating
646 a ServoHost object,
647 e.g. {'servo_host': '172.11.11.111',
648 'servo_port': 9999}.
649 See comments above.
Fang Denge545abb2014-12-30 18:43:47 -0800650 @param try_lab_servo: Boolean. Whether to create ServoHost for a device
651 in test lab. See above.
Dan Shi4d478522014-02-14 13:46:32 -0800652
653 @returns: A ServoHost object or None. See comments above.
654
655 """
Simran Basi0739d682015-02-25 16:22:56 -0800656 if not utils.is_moblab():
657 lab_servo_hostname = make_servo_hostname(dut)
658 is_in_lab = utils.host_is_in_lab_zone(lab_servo_hostname)
659 else:
660 # Servos on Moblab are not in the actual lab.
661 is_in_lab = False
662 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
663 hosts = afe.get_hosts(hostname=dut)
664 if hosts and SERVO_HOST_ATTR in hosts[0].attributes:
665 servo_args = {}
666 servo_args[SERVO_HOST_ATTR] = hosts[0].attributes[SERVO_HOST_ATTR]
667 servo_args[SERVO_PORT_ATTR] = hosts[0].attributes.get(
668 SERVO_PORT_ATTR, 9999)
Dan Shi4d478522014-02-14 13:46:32 -0800669
Fang Denge545abb2014-12-30 18:43:47 -0800670 if not is_in_lab:
671 if servo_args is None:
672 return None
673 return ServoHost(required_by_test=True, is_in_lab=False, **servo_args)
674 elif servo_args is not None or try_lab_servo:
Christopher Wileycef1f902014-06-19 11:11:23 -0700675 # Technically, this duplicates the SSH ping done early in the servo
676 # proxy initialization code. However, this ping ends in a couple
677 # seconds when if fails, rather than the 60 seconds it takes to decide
678 # that an SSH ping has timed out. Specifically, that timeout happens
679 # when our servo DNS name resolves, but there is no host at that IP.
680 # TODO(dshi): crbug.com/380773 Remove this ping check once the bug is
681 # fixed. Autotest should not try to verify servo if servo is
682 # not required for the test.
683 ping_config = ping_runner.PingConfig(
684 lab_servo_hostname, count=3,
685 ignore_result=True, ignore_status=True)
686 logging.info('Pinging servo at %s', lab_servo_hostname)
687 host_is_up = ping_runner.PingRunner().ping(ping_config).received > 0
688 if host_is_up:
689 return ServoHost(servo_host=lab_servo_hostname, is_in_lab=is_in_lab,
690 required_by_test=(servo_args is not None))
Dan Shi4d478522014-02-14 13:46:32 -0800691 else:
Dan Shibbb0cb62014-03-24 17:50:57 -0700692 return None