blob: bec28ef3b3fba1e140c314c639b39032cbb2e4ff [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
Dan Shi0942b1d2015-03-31 11:07:00 -070021from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080022from autotest_lib.client.common_lib.cros import autoupdater
23from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Dan Shi0942b1d2015-03-31 11:07:00 -070027from autotest_lib.client.cros import constants as client_constants
beeps5e8c45a2013-12-17 22:05:11 -080028from autotest_lib.server import site_utils as server_site_utils
Fang Deng5d518f42013-08-02 14:04:32 -070029from autotest_lib.server.cros.servo import servo
Simran Basi0739d682015-02-25 16:22:56 -080030from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Fang Deng5d518f42013-08-02 14:04:32 -070031from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070032from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070033
34
Simran Basi0739d682015-02-25 16:22:56 -080035# Names of the host attributes in the database that represent the values for
36# the servo_host and servo_port for a servo connected to the DUT.
37SERVO_HOST_ATTR = 'servo_host'
38SERVO_PORT_ATTR = 'servo_port'
39
40
Fang Deng5d518f42013-08-02 14:04:32 -070041class ServoHostException(error.AutoservError):
42 """This is the base class for exceptions raised by ServoHost."""
43 pass
44
45
46class ServoHostVerifyFailure(ServoHostException):
47 """Raised when servo verification fails."""
48 pass
49
50
Fang Dengd4fe7392013-09-20 12:18:21 -070051class ServoHostRepairFailure(ServoHostException):
52 """Raised when a repair method fails to repair a servo host."""
53 pass
54
55
Fang Dengf0ea6142013-10-10 21:43:16 -070056class ServoHostRepairMethodNA(ServoHostException):
57 """Raised when a repair method is not applicable."""
58 pass
59
60
Fang Deng5d518f42013-08-02 14:04:32 -070061class ServoHostRepairTotalFailure(ServoHostException):
62 """Raised if all attempts to repair a servo host fail."""
63 pass
64
65
66def make_servo_hostname(dut_hostname):
67 """Given a DUT's hostname, return the hostname of its servo.
68
69 @param dut_hostname: hostname of a DUT.
70
71 @return hostname of the DUT's servo.
72
73 """
74 host_parts = dut_hostname.split('.')
75 host_parts[0] = host_parts[0] + '-servo'
76 return '.'.join(host_parts)
77
78
79class ServoHost(ssh_host.SSHHost):
80 """Host class for a host that controls a servo, e.g. beaglebone."""
81
82 # Timeout for getting the value of 'pwr_button'.
83 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
84 # Timeout for rebooting servo host.
85 REBOOT_TIMEOUT_SECS = 90
86 HOST_DOWN_TIMEOUT_SECS = 60
87 # Delay after rebooting for servod to become fully functional.
88 REBOOT_DELAY_SECS = 20
89 # Servod process name.
90 SERVOD_PROCESS = 'servod'
Dan Shie5b3c512014-08-21 12:12:09 -070091 # Timeout for initializing servo signals.
92 INITIALIZE_SERVO_TIMEOUT_SECS = 30
Fang Deng5d518f42013-08-02 14:04:32 -070093
Fang Dengd4fe7392013-09-20 12:18:21 -070094 _MAX_POWER_CYCLE_ATTEMPTS = 3
Gabe Black1e1c41b2015-02-04 23:55:15 -080095 _timer = autotest_stats.Timer('servo_host')
Fang Dengd4fe7392013-09-20 12:18:21 -070096
Fang Deng5d518f42013-08-02 14:04:32 -070097
98 def _initialize(self, servo_host='localhost', servo_port=9999,
Dan Shi4d478522014-02-14 13:46:32 -080099 required_by_test=True, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700100 """Initialize a ServoHost instance.
101
102 A ServoHost instance represents a host that controls a servo.
103
104 @param servo_host: Name of the host where the servod process
105 is running.
106 @param servo_port: Port the servod process is listening on.
Dan Shi4d478522014-02-14 13:46:32 -0800107 @param required_by_test: True if servo is required by test.
108 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
109 to None, for which utils.host_is_in_lab_zone will be
110 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700111
112 """
113 super(ServoHost, self)._initialize(hostname=servo_host,
114 *args, **dargs)
Dan Shi4d478522014-02-14 13:46:32 -0800115 if is_in_lab is None:
116 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
117 else:
118 self._is_in_lab = is_in_lab
Fang Deng5d518f42013-08-02 14:04:32 -0700119 self._is_localhost = (self.hostname == 'localhost')
120 remote = 'http://%s:%s' % (self.hostname, servo_port)
121 self._servod_server = xmlrpclib.ServerProxy(remote)
122 # Commands on the servo host must be run by the superuser. Our account
123 # on Beaglebone is root, but locally we might be running as a
124 # different user. If so - `sudo ' will have to be added to the
125 # commands.
126 if self._is_localhost:
127 self._sudo_required = utils.system_output('id -u') != '0'
128 else:
129 self._sudo_required = False
Dan Shi4d478522014-02-14 13:46:32 -0800130 # Create a cache of Servo object. This must be called at the end of
131 # _initialize to make sure all attributes are set.
132 self._servo = None
133 try:
134 self.verify()
Alex Millercc589692014-04-21 18:00:22 -0700135 except Exception:
Dan Shibbb0cb62014-03-24 17:50:57 -0700136 if required_by_test:
137 if not self.is_in_lab():
138 raise
139 else:
140 self.repair_full()
Fang Deng5d518f42013-08-02 14:04:32 -0700141
142
143 def is_in_lab(self):
144 """Check whether the servo host is a lab device.
145
146 @returns: True if the servo host is in Cros Lab, otherwise False.
147
148 """
149 return self._is_in_lab
150
151
152 def is_localhost(self):
153 """Checks whether the servo host points to localhost.
154
155 @returns: True if it points to localhost, otherwise False.
156
157 """
158 return self._is_localhost
159
160
161 def get_servod_server_proxy(self):
162 """Return a proxy that can be used to communicate with servod server.
163
164 @returns: An xmlrpclib.ServerProxy that is connected to the servod
165 server on the host.
166
167 """
168 return self._servod_server
169
170
171 def get_wait_up_processes(self):
172 """Get the list of local processes to wait for in wait_up.
173
174 Override get_wait_up_processes in
175 autotest_lib.client.common_lib.hosts.base_classes.Host.
176 Wait for servod process to go up. Called by base class when
177 rebooting the device.
178
179 """
180 processes = [self.SERVOD_PROCESS]
181 return processes
182
183
beeps5e8c45a2013-12-17 22:05:11 -0800184 def _is_cros_host(self):
185 """Check if a servo host is running chromeos.
186
187 @return: True if the servo host is running chromeos.
188 False if it isn't, or we don't have enough information.
189 """
190 try:
191 result = self.run('grep -q CHROMEOS /etc/lsb-release',
192 ignore_status=True, timeout=10)
193 except (error.AutoservRunError, error.AutoservSSHTimeout):
194 return False
195 return result.exit_status == 0
196
197
Fang Deng5d518f42013-08-02 14:04:32 -0700198 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
199 connect_timeout=None, alive_interval=None):
200 """Override default make_ssh_command to use tuned options.
201
202 Tuning changes:
203 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
204 connection failure. Consistency with remote_access.py.
205
206 - ServerAliveInterval=180; which causes SSH to ping connection every
207 180 seconds. In conjunction with ServerAliveCountMax ensures
208 that if the connection dies, Autotest will bail out quickly.
209
210 - ServerAliveCountMax=3; consistency with remote_access.py.
211
212 - ConnectAttempts=4; reduce flakiness in connection errors;
213 consistency with remote_access.py.
214
215 - UserKnownHostsFile=/dev/null; we don't care about the keys.
216
217 - SSH protocol forced to 2; needed for ServerAliveInterval.
218
219 @param user User name to use for the ssh connection.
220 @param port Port on the target host to use for ssh connection.
221 @param opts Additional options to the ssh command.
222 @param hosts_file Ignored.
223 @param connect_timeout Ignored.
224 @param alive_interval Ignored.
225
226 @returns: An ssh command with the requested settings.
227
228 """
229 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
230 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
231 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
232 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
233 ' -o Protocol=2 -l %s -p %d')
234 return base_command % (opts, user, port)
235
236
237 def _make_scp_cmd(self, sources, dest):
238 """Format scp command.
239
240 Given a list of source paths and a destination path, produces the
241 appropriate scp command for encoding it. Remote paths must be
242 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
243 to allow additional ssh options.
244
245 @param sources: A list of source paths to copy from.
246 @param dest: Destination path to copy to.
247
248 @returns: An scp command that copies |sources| on local machine to
249 |dest| on the remote servo host.
250
251 """
252 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
253 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
254 return command % (self.master_ssh_option,
255 self.port, ' '.join(sources), dest)
256
257
258 def run(self, command, timeout=3600, ignore_status=False,
259 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
260 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
261 """Run a command on the servo host.
262
263 Extends method `run` in SSHHost. If the servo host is a remote device,
264 it will call `run` in SSHost without changing anything.
265 If the servo host is 'localhost', it will call utils.system_output.
266
267 @param command: The command line string.
268 @param timeout: Time limit in seconds before attempting to
269 kill the running process. The run() function
270 will take a few seconds longer than 'timeout'
271 to complete if it has to kill the process.
272 @param ignore_status: Do not raise an exception, no matter
273 what the exit code of the command is.
274 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
275 @param connect_timeout: SSH connection timeout (in seconds)
276 Ignored if host is 'localhost'.
277 @param options: String with additional ssh command options
278 Ignored if host is 'localhost'.
279 @param stdin: Stdin to pass (a string) to the executed command.
280 @param verbose: Log the commands.
281 @param args: Sequence of strings to pass as arguments to command by
282 quoting them in " and escaping their contents if necessary.
283
284 @returns: A utils.CmdResult object.
285
286 @raises AutoservRunError if the command failed.
287 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
288 when servo host is not 'localhost'.
289
290 """
291 run_args = {'command': command, 'timeout': timeout,
292 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
293 'stderr_tee': stderr_tee, 'stdin': stdin,
294 'verbose': verbose, 'args': args}
295 if self.is_localhost():
296 if self._sudo_required:
297 run_args['command'] = 'sudo -n %s' % command
298 try:
299 return utils.run(**run_args)
300 except error.CmdError as e:
301 logging.error(e)
302 raise error.AutoservRunError('command execution error',
303 e.result_obj)
304 else:
305 run_args['connect_timeout'] = connect_timeout
306 run_args['options'] = options
307 return super(ServoHost, self).run(**run_args)
308
309
Dan Shi33412a82014-06-10 15:12:27 -0700310 @_timer.decorate
Fang Deng5d518f42013-08-02 14:04:32 -0700311 def _check_servod(self):
312 """A sanity check of the servod state."""
313 msg_prefix = 'Servod error: %s'
314 error_msg = None
315 try:
316 timeout, _ = retry.timeout(
317 self._servod_server.get, args=('pwr_button', ),
318 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
319 if timeout:
320 error_msg = msg_prefix % 'Request timed out.'
321 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
322 error_msg = msg_prefix % e
323 if error_msg:
324 raise ServoHostVerifyFailure(error_msg)
325
326
Dan Shi33412a82014-06-10 15:12:27 -0700327 def _check_servo_config(self):
328 """Check if config file exists for servod.
329
330 If servod config file does not exist, there is no need to verify if
331 servo is working. The servo could be attached to a board not supported
332 yet.
333
334 @raises ServoHostVerifyFailure if /var/lib/servod/config does not exist.
335
336 """
Simran Basi0739d682015-02-25 16:22:56 -0800337 if self._is_localhost:
338 return
Dan Shi33412a82014-06-10 15:12:27 -0700339 try:
340 self.run('test -f /var/lib/servod/config')
341 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
Ricky Liang86b80182014-06-13 14:39:42 +0800342 if not self._is_cros_host():
343 logging.info('Ignoring servo config check failure, either %s '
344 'is not running chromeos or we cannot find enough '
345 'information about the host.', self.hostname)
346 return
Dan Shi33412a82014-06-10 15:12:27 -0700347 raise ServoHostVerifyFailure(
348 'Servo config file check failed for %s: %s' %
349 (self.hostname, e))
350
351
Dan Shie5b3c512014-08-21 12:12:09 -0700352 def _check_servod_status(self):
353 """Check if servod process is running.
354
355 If servod is not running, there is no need to verify if servo is
356 working. Check the process before making any servod call can avoid
357 long timeout that eventually fail any servod call.
358 If the servo host is set to localhost, failure of servod status check
359 will be ignored, as servo call may use ssh tunnel.
360
361 @raises ServoHostVerifyFailure if servod process does not exist.
362
363 """
364 try:
Dan Shi18040e42014-09-03 11:14:00 -0700365 pids = [str(int(s)) for s in
366 self.run('pgrep servod').stdout.strip().split('\n')]
367 logging.info('servod is running, PID=%s', ','.join(pids))
Dan Shie5b3c512014-08-21 12:12:09 -0700368 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
369 if self._is_localhost:
370 logging.info('Ignoring servod status check failure. servo host '
371 'is set to localhost, servo call may use ssh '
372 'tunnel to go through.')
373 else:
374 raise ServoHostVerifyFailure(
375 'Servod status check failed for %s: %s' %
376 (self.hostname, e))
377
378
Dan Shi0942b1d2015-03-31 11:07:00 -0700379 def get_release_version(self):
380 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
381
382 @returns The version string in lsb-release, under attribute
383 CHROMEOS_RELEASE_VERSION.
384 """
385 lsb_release_content = self.run(
386 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
387 return lsbrelease_utils.get_chromeos_release_version(
388 lsb_release_content=lsb_release_content)
389
390
beeps5e8c45a2013-12-17 22:05:11 -0800391 @_timer.decorate
392 def _update_image(self):
393 """Update the image on the servo host, if needed.
394
J. Richard Barnette84895392015-04-30 12:31:01 -0700395 This method recognizes the following cases:
396 * If the Host is not running Chrome OS, do nothing.
397 * If a previously triggered update is now complete, reboot
398 to the new version.
399 * If the host is processing a previously triggered update,
400 do nothing.
401 * If the host is running a version of Chrome OS different
402 from the default for servo Hosts, trigger an update, but
403 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800404
405 @raises dev_server.DevServerException: If all the devservers are down.
406 @raises site_utils.ParseBuildNameException: If the devserver returns
407 an invalid build name.
408 @raises autoupdater.ChromiumOSError: If something goes wrong in the
409 checking update engine client status or applying an update.
410 @raises AutoservRunError: If the update_engine_client isn't present on
411 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700412
beeps5e8c45a2013-12-17 22:05:11 -0800413 """
414 #TODO(beeps): Remove this check once all servo hosts are using chromeos.
415 if not self._is_cros_host():
416 logging.info('Not attempting an update, either %s is not running '
417 'chromeos or we cannot find enough information about '
418 'the host.', self.hostname)
419 return
420
J. Richard Barnette84895392015-04-30 12:31:01 -0700421 board = global_config.global_config.get_config_value(
422 'CROS', 'servo_board')
423 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
424 target_version = afe.run('get_stable_version', board=board)
425 build_pattern = global_config.global_config.get_config_value(
426 'CROS', 'stable_build_pattern')
427 target_build = build_pattern % (board, target_version)
428 target_build_number = server_site_utils.ParseBuildName(
429 target_build)[3]
beeps5e8c45a2013-12-17 22:05:11 -0800430 ds = dev_server.ImageServer.resolve(self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700431 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800432
433 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Dan Shi0942b1d2015-03-31 11:07:00 -0700434 current_build_number = self.get_release_version()
beeps5e8c45a2013-12-17 22:05:11 -0800435 status = updater.check_update_status()
436
437 if status == autoupdater.UPDATER_NEED_REBOOT:
438 logging.info('Rebooting beaglebone host %s with build %s',
439 self.hostname, current_build_number)
440 kwargs = {
441 'reboot_cmd': ('((reboot & sleep 10; reboot -f &) '
442 '</dev/null >/dev/null 2>&1 &)'),
443 'fastsync': True,
444 'label': None,
Dan Shiddd7a0e2014-04-29 11:55:34 -0700445 'wait': False,
beeps5e8c45a2013-12-17 22:05:11 -0800446 }
Dan Shiddd7a0e2014-04-29 11:55:34 -0700447 # Do not wait for reboot to complete. Otherwise, self.reboot call
448 # will log reboot failure if servo does not come back. The logged
449 # reboot failure will lead to test job failure. If the test does not
450 # require servo, we don't want servo failure to fail the test with
451 # error: `Host did not return from reboot` in status.log
452 # If servo does not come back after reboot, exception needs to be
453 # raised, so test requires servo should fail.
beeps5e8c45a2013-12-17 22:05:11 -0800454 self.reboot(**kwargs)
Dan Shiddd7a0e2014-04-29 11:55:34 -0700455 if self.wait_up(timeout=120):
Dan Shi0942b1d2015-03-31 11:07:00 -0700456 current_build_number = self.get_release_version()
Dan Shiddd7a0e2014-04-29 11:55:34 -0700457 logging.info('servo host %s back from reboot, with build %s',
458 self.hostname, current_build_number)
459 else:
460 raise error.AutoservHostError(
461 'servo host %s failed to come back from reboot.' %
462 self.hostname)
beeps5e8c45a2013-12-17 22:05:11 -0800463
464 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
465 logging.info('servo host %s already processing an update, update '
466 'engine client status=%s', self.hostname, status)
J. Richard Barnette84895392015-04-30 12:31:01 -0700467 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800468 logging.info('Using devserver url: %s to trigger update on '
469 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700470 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800471 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700472 ds.stage_artifacts(target_build,
473 artifacts=['full_payload'])
474 except Exception as e:
475 logging.error('Staging artifacts failed: %s', str(e))
476 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800477 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700478 try:
479 updater.trigger_update()
480 except autoupdater.RootFSUpdateError as e:
481 trigger_download_status = 'failed with %s' % str(e)
482 autotest_stats.Counter(
483 'servo_host.RootFSUpdateError').increment()
484 else:
485 trigger_download_status = 'passed'
486 logging.info('Triggered download and update %s for %s, '
487 'update engine currently in status %s',
488 trigger_download_status, self.hostname,
489 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800490 else:
491 logging.info('servo host %s does not require an update.',
492 self.hostname)
493
494
Fang Deng5d518f42013-08-02 14:04:32 -0700495 def verify_software(self):
beeps5e8c45a2013-12-17 22:05:11 -0800496 """Update the servo host and verify it's in a good state.
Fang Deng5d518f42013-08-02 14:04:32 -0700497
498 It overrides the base class function for verify_software.
beeps5e8c45a2013-12-17 22:05:11 -0800499 If an update is available, downloads and applies it. Then verifies:
Fang Deng5d518f42013-08-02 14:04:32 -0700500 1) Whether basic servo command can run successfully.
501 2) Whether USB is in a good state. crbug.com/225932
502
503 @raises ServoHostVerifyFailure if servo host does not pass the checks.
504
505 """
beeps5e8c45a2013-12-17 22:05:11 -0800506 logging.info('Applying an update to the servo host, if necessary.')
507 self._update_image()
Dan Shi33412a82014-06-10 15:12:27 -0700508 self._check_servo_config()
Dan Shie5b3c512014-08-21 12:12:09 -0700509 self._check_servod_status()
510
Dan Shi4d478522014-02-14 13:46:32 -0800511 # If servo is already initialized, we don't need to do it again, call
512 # _check_servod should be enough.
513 if self._servo:
514 self._check_servod()
515 else:
516 self._servo = servo.Servo(servo_host=self)
Dan Shie5b3c512014-08-21 12:12:09 -0700517 timeout, _ = retry.timeout(
518 self._servo.initialize_dut,
519 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
520 if timeout:
521 raise ServoHostVerifyFailure('Servo initialize timed out.')
Fang Deng5d518f42013-08-02 14:04:32 -0700522 logging.info('Sanity checks pass on servo host %s', self.hostname)
523
524
525 def _repair_with_sysrq_reboot(self):
526 """Reboot with magic SysRq key."""
527 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
528 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
529 reboot_cmd='echo "b" > /proc/sysrq-trigger &',
530 fastsync=True)
531 time.sleep(self.REBOOT_DELAY_SECS)
532
533
Fang Dengd4fe7392013-09-20 12:18:21 -0700534 def has_power(self):
535 """Return whether or not the servo host is powered by PoE."""
536 # TODO(fdeng): See crbug.com/302791
537 # For now, assume all servo hosts in the lab have power.
538 return self.is_in_lab()
539
540
541 def power_cycle(self):
542 """Cycle power to this host via PoE if it is a lab device.
543
544 @raises ServoHostRepairFailure if it fails to power cycle the
545 servo host.
546
547 """
548 if self.has_power():
549 try:
550 rpm_client.set_power(self.hostname, 'CYCLE')
551 except (socket.error, xmlrpclib.Error,
552 httplib.BadStatusLine,
553 rpm_client.RemotePowerException) as e:
554 raise ServoHostRepairFailure(
555 'Power cycling %s failed: %s' % (self.hostname, e))
556 else:
557 logging.info('Skipping power cycling, not a lab device.')
558
559
Fang Deng5d518f42013-08-02 14:04:32 -0700560 def _powercycle_to_repair(self):
Fang Dengd4fe7392013-09-20 12:18:21 -0700561 """Power cycle the servo host using PoE.
562
563 @raises ServoHostRepairFailure if it fails to fix the servo host.
Fang Dengf0ea6142013-10-10 21:43:16 -0700564 @raises ServoHostRepairMethodNA if it does not support power.
Fang Dengd4fe7392013-09-20 12:18:21 -0700565
566 """
567 if not self.has_power():
Fang Dengf0ea6142013-10-10 21:43:16 -0700568 raise ServoHostRepairMethodNA('%s does not support power.' %
569 self.hostname)
Fang Dengd4fe7392013-09-20 12:18:21 -0700570 logging.info('Attempting repair via PoE powercycle.')
571 failed_cycles = 0
572 self.power_cycle()
573 while not self.wait_up(timeout=self.REBOOT_TIMEOUT_SECS):
574 failed_cycles += 1
575 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
576 raise ServoHostRepairFailure(
577 'Powercycled host %s %d times; device did not come back'
578 ' online.' % (self.hostname, failed_cycles))
579 self.power_cycle()
580 logging.info('Powercycling was successful after %d failures.',
581 failed_cycles)
582 # Allow some time for servod to get started.
583 time.sleep(self.REBOOT_DELAY_SECS)
Fang Deng5d518f42013-08-02 14:04:32 -0700584
585
586 def repair_full(self):
587 """Attempt to repair servo host.
588
589 This overrides the base class function for repair.
590 Note if the host is not in Cros Lab, the repair procedure
591 will be skipped.
592
593 @raises ServoHostRepairTotalFailure if all attempts fail.
594
595 """
596 if not self.is_in_lab():
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700597 logging.warning('Skip repairing servo host %s: Not a lab device.',
Fang Deng5d518f42013-08-02 14:04:32 -0700598 self.hostname)
599 return
600 logging.info('Attempting to repair servo host %s.', self.hostname)
Dan Shi4d478522014-02-14 13:46:32 -0800601 # Reset the cache to guarantee servo initialization being called later.
602 self._servo = None
Dan Shi0cf92c82014-02-20 15:45:01 -0800603 # TODO(dshi): add self._powercycle_to_repair back to repair_funcs
604 # after crbug.com/336606 is fixed.
605 repair_funcs = [self._repair_with_sysrq_reboot,]
Fang Deng5d518f42013-08-02 14:04:32 -0700606 errors = []
607 for repair_func in repair_funcs:
Fang Dengf0ea6142013-10-10 21:43:16 -0700608 counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
Fang Deng5d518f42013-08-02 14:04:32 -0700609 try:
610 repair_func()
611 self.verify()
Gabe Black1e1c41b2015-02-04 23:55:15 -0800612 autotest_stats.Counter(counter_prefix + 'SUCCEEDED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700613 return
Fang Dengf0ea6142013-10-10 21:43:16 -0700614 except ServoHostRepairMethodNA as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700615 logging.warning('Repair method NA: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800616 autotest_stats.Counter(counter_prefix + 'RepairNA').increment()
Fang Dengf0ea6142013-10-10 21:43:16 -0700617 errors.append(str(e))
Fang Deng5d518f42013-08-02 14:04:32 -0700618 except Exception as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700619 logging.warning('Failed to repair servo: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800620 autotest_stats.Counter(counter_prefix + 'FAILED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700621 errors.append(str(e))
Gabe Black1e1c41b2015-02-04 23:55:15 -0800622 autotest_stats.Counter('servo_host_repair.Full_Repair_Failed'). \
623 increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700624 raise ServoHostRepairTotalFailure(
625 'All attempts at repairing the servo failed:\n%s' %
626 '\n'.join(errors))
627
628
Dan Shi4d478522014-02-14 13:46:32 -0800629 def get_servo(self):
630 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700631
Dan Shi4d478522014-02-14 13:46:32 -0800632 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700633 """
Dan Shi4d478522014-02-14 13:46:32 -0800634 return self._servo
635
636
Fang Denge545abb2014-12-30 18:43:47 -0800637def create_servo_host(dut, servo_args, try_lab_servo=False):
Dan Shi4d478522014-02-14 13:46:32 -0800638 """Create a ServoHost object.
639
Fang Denge545abb2014-12-30 18:43:47 -0800640 The `servo_args` parameter is a dictionary specifying optional
641 Servo client parameter overrides (i.e. a specific host or port).
642 When specified, the caller requires that an exception be raised
643 unless both the ServoHost and the Servo are successfully
644 created.
645
646 There are three possible cases:
647 1. If the DUT is in the Cros test lab then the ServoHost object
648 is only created for the host in the lab. Alternate host or
649 port settings in `servo_host` will be ignored.
650 2. When not case 1., but `servo_args` is not `None`, then create
651 a ServoHost object using `servo_args`.
652 3. Otherwise, return `None`.
653
654 When the `try_lab_servo` parameter is false, it indicates that a
655 ServoHost should not be created for a device in the Cros test
656 lab. The setting of `servo_args` takes precedence over the
657 setting of `try_lab_servo`.
Dan Shi4d478522014-02-14 13:46:32 -0800658
659 @param dut: host name of the host that servo connects. It can be used to
660 lookup the servo in test lab using naming convention.
661 @param servo_args: A dictionary that contains args for creating
662 a ServoHost object,
663 e.g. {'servo_host': '172.11.11.111',
664 'servo_port': 9999}.
665 See comments above.
Fang Denge545abb2014-12-30 18:43:47 -0800666 @param try_lab_servo: Boolean. Whether to create ServoHost for a device
667 in test lab. See above.
Dan Shi4d478522014-02-14 13:46:32 -0800668
669 @returns: A ServoHost object or None. See comments above.
670
671 """
Simran Basi0739d682015-02-25 16:22:56 -0800672 if not utils.is_moblab():
673 lab_servo_hostname = make_servo_hostname(dut)
674 is_in_lab = utils.host_is_in_lab_zone(lab_servo_hostname)
675 else:
676 # Servos on Moblab are not in the actual lab.
677 is_in_lab = False
678 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
679 hosts = afe.get_hosts(hostname=dut)
680 if hosts and SERVO_HOST_ATTR in hosts[0].attributes:
681 servo_args = {}
682 servo_args[SERVO_HOST_ATTR] = hosts[0].attributes[SERVO_HOST_ATTR]
683 servo_args[SERVO_PORT_ATTR] = hosts[0].attributes.get(
684 SERVO_PORT_ATTR, 9999)
Dan Shi4d478522014-02-14 13:46:32 -0800685
Fang Denge545abb2014-12-30 18:43:47 -0800686 if not is_in_lab:
687 if servo_args is None:
688 return None
689 return ServoHost(required_by_test=True, is_in_lab=False, **servo_args)
690 elif servo_args is not None or try_lab_servo:
Christopher Wileycef1f902014-06-19 11:11:23 -0700691 # Technically, this duplicates the SSH ping done early in the servo
692 # proxy initialization code. However, this ping ends in a couple
693 # seconds when if fails, rather than the 60 seconds it takes to decide
694 # that an SSH ping has timed out. Specifically, that timeout happens
695 # when our servo DNS name resolves, but there is no host at that IP.
696 # TODO(dshi): crbug.com/380773 Remove this ping check once the bug is
697 # fixed. Autotest should not try to verify servo if servo is
698 # not required for the test.
699 ping_config = ping_runner.PingConfig(
700 lab_servo_hostname, count=3,
701 ignore_result=True, ignore_status=True)
702 logging.info('Pinging servo at %s', lab_servo_hostname)
703 host_is_up = ping_runner.PingRunner().ping(ping_config).received > 0
704 if host_is_up:
705 return ServoHost(servo_host=lab_servo_hostname, is_in_lab=is_in_lab,
706 required_by_test=(servo_args is not None))
Dan Shi4d478522014-02-14 13:46:32 -0800707 else:
Dan Shibbb0cb62014-03-24 17:50:57 -0700708 return None