blob: 4100dbfb52776cc9f5e444c65fa424f8341a397f [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
Dan Shi0942b1d2015-03-31 11:07:00 -070021from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080022from autotest_lib.client.common_lib.cros import autoupdater
23from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000027from autotest_lib.client.cros import constants as client_constants
beeps5e8c45a2013-12-17 22:05:11 -080028from autotest_lib.server import site_utils as server_site_utils
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080029from autotest_lib.server.cros import dnsname_mangler
Fang Deng5d518f42013-08-02 14:04:32 -070030from autotest_lib.server.cros.servo import servo
Simran Basi0739d682015-02-25 16:22:56 -080031from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Fang Deng5d518f42013-08-02 14:04:32 -070032from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070033from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070034
35
Simran Basi0739d682015-02-25 16:22:56 -080036# Names of the host attributes in the database that represent the values for
37# the servo_host and servo_port for a servo connected to the DUT.
38SERVO_HOST_ATTR = 'servo_host'
39SERVO_PORT_ATTR = 'servo_port'
40
Dan Shi3b2adf62015-09-02 17:46:54 -070041_CONFIG = global_config.global_config
Simran Basi0739d682015-02-25 16:22:56 -080042
Fang Deng5d518f42013-08-02 14:04:32 -070043class ServoHostException(error.AutoservError):
44 """This is the base class for exceptions raised by ServoHost."""
45 pass
46
47
48class ServoHostVerifyFailure(ServoHostException):
49 """Raised when servo verification fails."""
50 pass
51
52
Fang Dengd4fe7392013-09-20 12:18:21 -070053class ServoHostRepairFailure(ServoHostException):
54 """Raised when a repair method fails to repair a servo host."""
55 pass
56
57
Fang Dengf0ea6142013-10-10 21:43:16 -070058class ServoHostRepairMethodNA(ServoHostException):
59 """Raised when a repair method is not applicable."""
60 pass
61
62
Fang Deng5d518f42013-08-02 14:04:32 -070063class ServoHostRepairTotalFailure(ServoHostException):
64 """Raised if all attempts to repair a servo host fail."""
65 pass
66
67
68def make_servo_hostname(dut_hostname):
69 """Given a DUT's hostname, return the hostname of its servo.
70
71 @param dut_hostname: hostname of a DUT.
72
73 @return hostname of the DUT's servo.
74
75 """
76 host_parts = dut_hostname.split('.')
77 host_parts[0] = host_parts[0] + '-servo'
78 return '.'.join(host_parts)
79
80
81class ServoHost(ssh_host.SSHHost):
82 """Host class for a host that controls a servo, e.g. beaglebone."""
83
84 # Timeout for getting the value of 'pwr_button'.
85 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
86 # Timeout for rebooting servo host.
87 REBOOT_TIMEOUT_SECS = 90
88 HOST_DOWN_TIMEOUT_SECS = 60
89 # Delay after rebooting for servod to become fully functional.
90 REBOOT_DELAY_SECS = 20
91 # Servod process name.
92 SERVOD_PROCESS = 'servod'
Dan Shie5b3c512014-08-21 12:12:09 -070093 # Timeout for initializing servo signals.
94 INITIALIZE_SERVO_TIMEOUT_SECS = 30
Fang Deng5d518f42013-08-02 14:04:32 -070095
Fang Dengd4fe7392013-09-20 12:18:21 -070096 _MAX_POWER_CYCLE_ATTEMPTS = 3
Gabe Black1e1c41b2015-02-04 23:55:15 -080097 _timer = autotest_stats.Timer('servo_host')
Fang Dengd4fe7392013-09-20 12:18:21 -070098
Fang Deng5d518f42013-08-02 14:04:32 -070099
100 def _initialize(self, servo_host='localhost', servo_port=9999,
Dan Shi4d478522014-02-14 13:46:32 -0800101 required_by_test=True, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700102 """Initialize a ServoHost instance.
103
104 A ServoHost instance represents a host that controls a servo.
105
106 @param servo_host: Name of the host where the servod process
107 is running.
108 @param servo_port: Port the servod process is listening on.
Dan Shi4d478522014-02-14 13:46:32 -0800109 @param required_by_test: True if servo is required by test.
110 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
111 to None, for which utils.host_is_in_lab_zone will be
112 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700113
114 """
115 super(ServoHost, self)._initialize(hostname=servo_host,
116 *args, **dargs)
Dan Shi4d478522014-02-14 13:46:32 -0800117 if is_in_lab is None:
118 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
119 else:
120 self._is_in_lab = is_in_lab
Fang Deng5d518f42013-08-02 14:04:32 -0700121 self._is_localhost = (self.hostname == 'localhost')
122 remote = 'http://%s:%s' % (self.hostname, servo_port)
123 self._servod_server = xmlrpclib.ServerProxy(remote)
124 # Commands on the servo host must be run by the superuser. Our account
125 # on Beaglebone is root, but locally we might be running as a
126 # different user. If so - `sudo ' will have to be added to the
127 # commands.
128 if self._is_localhost:
129 self._sudo_required = utils.system_output('id -u') != '0'
130 else:
131 self._sudo_required = False
Dan Shi4d478522014-02-14 13:46:32 -0800132 # Create a cache of Servo object. This must be called at the end of
133 # _initialize to make sure all attributes are set.
134 self._servo = None
135 try:
136 self.verify()
Alex Millercc589692014-04-21 18:00:22 -0700137 except Exception:
Dan Shibbb0cb62014-03-24 17:50:57 -0700138 if required_by_test:
139 if not self.is_in_lab():
140 raise
141 else:
142 self.repair_full()
Fang Deng5d518f42013-08-02 14:04:32 -0700143
144
145 def is_in_lab(self):
146 """Check whether the servo host is a lab device.
147
148 @returns: True if the servo host is in Cros Lab, otherwise False.
149
150 """
151 return self._is_in_lab
152
153
154 def is_localhost(self):
155 """Checks whether the servo host points to localhost.
156
157 @returns: True if it points to localhost, otherwise False.
158
159 """
160 return self._is_localhost
161
162
163 def get_servod_server_proxy(self):
164 """Return a proxy that can be used to communicate with servod server.
165
166 @returns: An xmlrpclib.ServerProxy that is connected to the servod
167 server on the host.
168
169 """
170 return self._servod_server
171
172
173 def get_wait_up_processes(self):
174 """Get the list of local processes to wait for in wait_up.
175
176 Override get_wait_up_processes in
177 autotest_lib.client.common_lib.hosts.base_classes.Host.
178 Wait for servod process to go up. Called by base class when
179 rebooting the device.
180
181 """
182 processes = [self.SERVOD_PROCESS]
183 return processes
184
185
beeps5e8c45a2013-12-17 22:05:11 -0800186 def _is_cros_host(self):
187 """Check if a servo host is running chromeos.
188
189 @return: True if the servo host is running chromeos.
190 False if it isn't, or we don't have enough information.
191 """
192 try:
193 result = self.run('grep -q CHROMEOS /etc/lsb-release',
194 ignore_status=True, timeout=10)
195 except (error.AutoservRunError, error.AutoservSSHTimeout):
196 return False
197 return result.exit_status == 0
198
199
Fang Deng5d518f42013-08-02 14:04:32 -0700200 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
201 connect_timeout=None, alive_interval=None):
202 """Override default make_ssh_command to use tuned options.
203
204 Tuning changes:
205 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
206 connection failure. Consistency with remote_access.py.
207
208 - ServerAliveInterval=180; which causes SSH to ping connection every
209 180 seconds. In conjunction with ServerAliveCountMax ensures
210 that if the connection dies, Autotest will bail out quickly.
211
212 - ServerAliveCountMax=3; consistency with remote_access.py.
213
214 - ConnectAttempts=4; reduce flakiness in connection errors;
215 consistency with remote_access.py.
216
217 - UserKnownHostsFile=/dev/null; we don't care about the keys.
218
219 - SSH protocol forced to 2; needed for ServerAliveInterval.
220
221 @param user User name to use for the ssh connection.
222 @param port Port on the target host to use for ssh connection.
223 @param opts Additional options to the ssh command.
224 @param hosts_file Ignored.
225 @param connect_timeout Ignored.
226 @param alive_interval Ignored.
227
228 @returns: An ssh command with the requested settings.
229
230 """
231 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
232 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
233 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
234 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
235 ' -o Protocol=2 -l %s -p %d')
236 return base_command % (opts, user, port)
237
238
239 def _make_scp_cmd(self, sources, dest):
240 """Format scp command.
241
242 Given a list of source paths and a destination path, produces the
243 appropriate scp command for encoding it. Remote paths must be
244 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
245 to allow additional ssh options.
246
247 @param sources: A list of source paths to copy from.
248 @param dest: Destination path to copy to.
249
250 @returns: An scp command that copies |sources| on local machine to
251 |dest| on the remote servo host.
252
253 """
254 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
255 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
256 return command % (self.master_ssh_option,
257 self.port, ' '.join(sources), dest)
258
259
260 def run(self, command, timeout=3600, ignore_status=False,
261 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
262 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
263 """Run a command on the servo host.
264
265 Extends method `run` in SSHHost. If the servo host is a remote device,
266 it will call `run` in SSHost without changing anything.
267 If the servo host is 'localhost', it will call utils.system_output.
268
269 @param command: The command line string.
270 @param timeout: Time limit in seconds before attempting to
271 kill the running process. The run() function
272 will take a few seconds longer than 'timeout'
273 to complete if it has to kill the process.
274 @param ignore_status: Do not raise an exception, no matter
275 what the exit code of the command is.
276 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
277 @param connect_timeout: SSH connection timeout (in seconds)
278 Ignored if host is 'localhost'.
279 @param options: String with additional ssh command options
280 Ignored if host is 'localhost'.
281 @param stdin: Stdin to pass (a string) to the executed command.
282 @param verbose: Log the commands.
283 @param args: Sequence of strings to pass as arguments to command by
284 quoting them in " and escaping their contents if necessary.
285
286 @returns: A utils.CmdResult object.
287
288 @raises AutoservRunError if the command failed.
289 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
290 when servo host is not 'localhost'.
291
292 """
293 run_args = {'command': command, 'timeout': timeout,
294 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
295 'stderr_tee': stderr_tee, 'stdin': stdin,
296 'verbose': verbose, 'args': args}
297 if self.is_localhost():
298 if self._sudo_required:
299 run_args['command'] = 'sudo -n %s' % command
300 try:
301 return utils.run(**run_args)
302 except error.CmdError as e:
303 logging.error(e)
304 raise error.AutoservRunError('command execution error',
305 e.result_obj)
306 else:
307 run_args['connect_timeout'] = connect_timeout
308 run_args['options'] = options
309 return super(ServoHost, self).run(**run_args)
310
311
Dan Shi33412a82014-06-10 15:12:27 -0700312 @_timer.decorate
Fang Deng5d518f42013-08-02 14:04:32 -0700313 def _check_servod(self):
314 """A sanity check of the servod state."""
315 msg_prefix = 'Servod error: %s'
316 error_msg = None
317 try:
318 timeout, _ = retry.timeout(
319 self._servod_server.get, args=('pwr_button', ),
320 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
321 if timeout:
322 error_msg = msg_prefix % 'Request timed out.'
323 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
324 error_msg = msg_prefix % e
325 if error_msg:
326 raise ServoHostVerifyFailure(error_msg)
327
328
Dan Shi33412a82014-06-10 15:12:27 -0700329 def _check_servo_config(self):
330 """Check if config file exists for servod.
331
332 If servod config file does not exist, there is no need to verify if
333 servo is working. The servo could be attached to a board not supported
334 yet.
335
336 @raises ServoHostVerifyFailure if /var/lib/servod/config does not exist.
337
338 """
Simran Basi0739d682015-02-25 16:22:56 -0800339 if self._is_localhost:
340 return
Dan Shi33412a82014-06-10 15:12:27 -0700341 try:
342 self.run('test -f /var/lib/servod/config')
343 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
Ricky Liang86b80182014-06-13 14:39:42 +0800344 if not self._is_cros_host():
345 logging.info('Ignoring servo config check failure, either %s '
346 'is not running chromeos or we cannot find enough '
347 'information about the host.', self.hostname)
348 return
Dan Shi33412a82014-06-10 15:12:27 -0700349 raise ServoHostVerifyFailure(
350 'Servo config file check failed for %s: %s' %
351 (self.hostname, e))
352
353
Dan Shie5b3c512014-08-21 12:12:09 -0700354 def _check_servod_status(self):
355 """Check if servod process is running.
356
357 If servod is not running, there is no need to verify if servo is
358 working. Check the process before making any servod call can avoid
359 long timeout that eventually fail any servod call.
360 If the servo host is set to localhost, failure of servod status check
361 will be ignored, as servo call may use ssh tunnel.
362
363 @raises ServoHostVerifyFailure if servod process does not exist.
364
365 """
366 try:
Dan Shi18040e42014-09-03 11:14:00 -0700367 pids = [str(int(s)) for s in
368 self.run('pgrep servod').stdout.strip().split('\n')]
369 logging.info('servod is running, PID=%s', ','.join(pids))
Dan Shie5b3c512014-08-21 12:12:09 -0700370 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
371 if self._is_localhost:
372 logging.info('Ignoring servod status check failure. servo host '
373 'is set to localhost, servo call may use ssh '
374 'tunnel to go through.')
375 else:
376 raise ServoHostVerifyFailure(
377 'Servod status check failed for %s: %s' %
378 (self.hostname, e))
379
380
Dan Shi0942b1d2015-03-31 11:07:00 -0700381 def get_release_version(self):
382 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
383
384 @returns The version string in lsb-release, under attribute
385 CHROMEOS_RELEASE_VERSION.
386 """
387 lsb_release_content = self.run(
388 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
389 return lsbrelease_utils.get_chromeos_release_version(
390 lsb_release_content=lsb_release_content)
391
392
beeps5e8c45a2013-12-17 22:05:11 -0800393 @_timer.decorate
394 def _update_image(self):
395 """Update the image on the servo host, if needed.
396
J. Richard Barnette84895392015-04-30 12:31:01 -0700397 This method recognizes the following cases:
398 * If the Host is not running Chrome OS, do nothing.
399 * If a previously triggered update is now complete, reboot
400 to the new version.
401 * If the host is processing a previously triggered update,
402 do nothing.
403 * If the host is running a version of Chrome OS different
404 from the default for servo Hosts, trigger an update, but
405 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800406
407 @raises dev_server.DevServerException: If all the devservers are down.
408 @raises site_utils.ParseBuildNameException: If the devserver returns
409 an invalid build name.
410 @raises autoupdater.ChromiumOSError: If something goes wrong in the
411 checking update engine client status or applying an update.
412 @raises AutoservRunError: If the update_engine_client isn't present on
413 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700414
beeps5e8c45a2013-12-17 22:05:11 -0800415 """
416 #TODO(beeps): Remove this check once all servo hosts are using chromeos.
417 if not self._is_cros_host():
418 logging.info('Not attempting an update, either %s is not running '
419 'chromeos or we cannot find enough information about '
420 'the host.', self.hostname)
421 return
422
Dan Shi3b2adf62015-09-02 17:46:54 -0700423 board = _CONFIG.get_config_value(
J. Richard Barnette84895392015-04-30 12:31:01 -0700424 'CROS', 'servo_board')
425 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
426 target_version = afe.run('get_stable_version', board=board)
Dan Shi3b2adf62015-09-02 17:46:54 -0700427 build_pattern = _CONFIG.get_config_value(
J. Richard Barnette84895392015-04-30 12:31:01 -0700428 'CROS', 'stable_build_pattern')
429 target_build = build_pattern % (board, target_version)
430 target_build_number = server_site_utils.ParseBuildName(
431 target_build)[3]
beeps5e8c45a2013-12-17 22:05:11 -0800432 ds = dev_server.ImageServer.resolve(self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700433 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800434
435 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Dan Shi0942b1d2015-03-31 11:07:00 -0700436 current_build_number = self.get_release_version()
beeps5e8c45a2013-12-17 22:05:11 -0800437 status = updater.check_update_status()
438
439 if status == autoupdater.UPDATER_NEED_REBOOT:
440 logging.info('Rebooting beaglebone host %s with build %s',
441 self.hostname, current_build_number)
442 kwargs = {
443 'reboot_cmd': ('((reboot & sleep 10; reboot -f &) '
444 '</dev/null >/dev/null 2>&1 &)'),
445 'fastsync': True,
446 'label': None,
Dan Shiddd7a0e2014-04-29 11:55:34 -0700447 'wait': False,
beeps5e8c45a2013-12-17 22:05:11 -0800448 }
Dan Shiddd7a0e2014-04-29 11:55:34 -0700449 # Do not wait for reboot to complete. Otherwise, self.reboot call
450 # will log reboot failure if servo does not come back. The logged
451 # reboot failure will lead to test job failure. If the test does not
452 # require servo, we don't want servo failure to fail the test with
453 # error: `Host did not return from reboot` in status.log
454 # If servo does not come back after reboot, exception needs to be
455 # raised, so test requires servo should fail.
beeps5e8c45a2013-12-17 22:05:11 -0800456 self.reboot(**kwargs)
Dan Shiddd7a0e2014-04-29 11:55:34 -0700457 if self.wait_up(timeout=120):
Dan Shi0942b1d2015-03-31 11:07:00 -0700458 current_build_number = self.get_release_version()
Dan Shiddd7a0e2014-04-29 11:55:34 -0700459 logging.info('servo host %s back from reboot, with build %s',
460 self.hostname, current_build_number)
461 else:
462 raise error.AutoservHostError(
463 'servo host %s failed to come back from reboot.' %
464 self.hostname)
beeps5e8c45a2013-12-17 22:05:11 -0800465
466 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
467 logging.info('servo host %s already processing an update, update '
468 'engine client status=%s', self.hostname, status)
J. Richard Barnette84895392015-04-30 12:31:01 -0700469 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800470 logging.info('Using devserver url: %s to trigger update on '
471 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700472 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800473 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700474 ds.stage_artifacts(target_build,
475 artifacts=['full_payload'])
476 except Exception as e:
477 logging.error('Staging artifacts failed: %s', str(e))
478 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800479 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700480 try:
481 updater.trigger_update()
482 except autoupdater.RootFSUpdateError as e:
483 trigger_download_status = 'failed with %s' % str(e)
484 autotest_stats.Counter(
485 'servo_host.RootFSUpdateError').increment()
486 else:
487 trigger_download_status = 'passed'
488 logging.info('Triggered download and update %s for %s, '
489 'update engine currently in status %s',
490 trigger_download_status, self.hostname,
491 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800492 else:
493 logging.info('servo host %s does not require an update.',
494 self.hostname)
495
496
Fang Deng5d518f42013-08-02 14:04:32 -0700497 def verify_software(self):
beeps5e8c45a2013-12-17 22:05:11 -0800498 """Update the servo host and verify it's in a good state.
Fang Deng5d518f42013-08-02 14:04:32 -0700499
500 It overrides the base class function for verify_software.
beeps5e8c45a2013-12-17 22:05:11 -0800501 If an update is available, downloads and applies it. Then verifies:
Fang Deng5d518f42013-08-02 14:04:32 -0700502 1) Whether basic servo command can run successfully.
503 2) Whether USB is in a good state. crbug.com/225932
504
505 @raises ServoHostVerifyFailure if servo host does not pass the checks.
506
507 """
beeps5e8c45a2013-12-17 22:05:11 -0800508 logging.info('Applying an update to the servo host, if necessary.')
509 self._update_image()
Dan Shi33412a82014-06-10 15:12:27 -0700510 self._check_servo_config()
Dan Shie5b3c512014-08-21 12:12:09 -0700511 self._check_servod_status()
512
Dan Shi4d478522014-02-14 13:46:32 -0800513 # If servo is already initialized, we don't need to do it again, call
514 # _check_servod should be enough.
515 if self._servo:
516 self._check_servod()
517 else:
518 self._servo = servo.Servo(servo_host=self)
Dan Shie5b3c512014-08-21 12:12:09 -0700519 timeout, _ = retry.timeout(
520 self._servo.initialize_dut,
521 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
522 if timeout:
523 raise ServoHostVerifyFailure('Servo initialize timed out.')
Fang Deng5d518f42013-08-02 14:04:32 -0700524 logging.info('Sanity checks pass on servo host %s', self.hostname)
525
526
527 def _repair_with_sysrq_reboot(self):
528 """Reboot with magic SysRq key."""
529 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
530 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
531 reboot_cmd='echo "b" > /proc/sysrq-trigger &',
532 fastsync=True)
533 time.sleep(self.REBOOT_DELAY_SECS)
534
535
Fang Dengd4fe7392013-09-20 12:18:21 -0700536 def has_power(self):
537 """Return whether or not the servo host is powered by PoE."""
538 # TODO(fdeng): See crbug.com/302791
539 # For now, assume all servo hosts in the lab have power.
540 return self.is_in_lab()
541
542
543 def power_cycle(self):
544 """Cycle power to this host via PoE if it is a lab device.
545
546 @raises ServoHostRepairFailure if it fails to power cycle the
547 servo host.
548
549 """
550 if self.has_power():
551 try:
552 rpm_client.set_power(self.hostname, 'CYCLE')
553 except (socket.error, xmlrpclib.Error,
554 httplib.BadStatusLine,
555 rpm_client.RemotePowerException) as e:
556 raise ServoHostRepairFailure(
557 'Power cycling %s failed: %s' % (self.hostname, e))
558 else:
559 logging.info('Skipping power cycling, not a lab device.')
560
561
Fang Deng5d518f42013-08-02 14:04:32 -0700562 def _powercycle_to_repair(self):
Fang Dengd4fe7392013-09-20 12:18:21 -0700563 """Power cycle the servo host using PoE.
564
565 @raises ServoHostRepairFailure if it fails to fix the servo host.
Fang Dengf0ea6142013-10-10 21:43:16 -0700566 @raises ServoHostRepairMethodNA if it does not support power.
Fang Dengd4fe7392013-09-20 12:18:21 -0700567
568 """
569 if not self.has_power():
Fang Dengf0ea6142013-10-10 21:43:16 -0700570 raise ServoHostRepairMethodNA('%s does not support power.' %
571 self.hostname)
Fang Dengd4fe7392013-09-20 12:18:21 -0700572 logging.info('Attempting repair via PoE powercycle.')
573 failed_cycles = 0
574 self.power_cycle()
575 while not self.wait_up(timeout=self.REBOOT_TIMEOUT_SECS):
576 failed_cycles += 1
577 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
578 raise ServoHostRepairFailure(
579 'Powercycled host %s %d times; device did not come back'
580 ' online.' % (self.hostname, failed_cycles))
581 self.power_cycle()
582 logging.info('Powercycling was successful after %d failures.',
583 failed_cycles)
584 # Allow some time for servod to get started.
585 time.sleep(self.REBOOT_DELAY_SECS)
Fang Deng5d518f42013-08-02 14:04:32 -0700586
587
588 def repair_full(self):
589 """Attempt to repair servo host.
590
591 This overrides the base class function for repair.
592 Note if the host is not in Cros Lab, the repair procedure
593 will be skipped.
594
595 @raises ServoHostRepairTotalFailure if all attempts fail.
596
597 """
598 if not self.is_in_lab():
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700599 logging.warning('Skip repairing servo host %s: Not a lab device.',
Fang Deng5d518f42013-08-02 14:04:32 -0700600 self.hostname)
601 return
602 logging.info('Attempting to repair servo host %s.', self.hostname)
Dan Shi4d478522014-02-14 13:46:32 -0800603 # Reset the cache to guarantee servo initialization being called later.
604 self._servo = None
Dan Shi0cf92c82014-02-20 15:45:01 -0800605 # TODO(dshi): add self._powercycle_to_repair back to repair_funcs
606 # after crbug.com/336606 is fixed.
607 repair_funcs = [self._repair_with_sysrq_reboot,]
Fang Deng5d518f42013-08-02 14:04:32 -0700608 errors = []
609 for repair_func in repair_funcs:
Fang Dengf0ea6142013-10-10 21:43:16 -0700610 counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
Fang Deng5d518f42013-08-02 14:04:32 -0700611 try:
612 repair_func()
613 self.verify()
Gabe Black1e1c41b2015-02-04 23:55:15 -0800614 autotest_stats.Counter(counter_prefix + 'SUCCEEDED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700615 return
Fang Dengf0ea6142013-10-10 21:43:16 -0700616 except ServoHostRepairMethodNA as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700617 logging.warning('Repair method NA: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800618 autotest_stats.Counter(counter_prefix + 'RepairNA').increment()
Fang Dengf0ea6142013-10-10 21:43:16 -0700619 errors.append(str(e))
Fang Deng5d518f42013-08-02 14:04:32 -0700620 except Exception as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700621 logging.warning('Failed to repair servo: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800622 autotest_stats.Counter(counter_prefix + 'FAILED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700623 errors.append(str(e))
Gabe Black1e1c41b2015-02-04 23:55:15 -0800624 autotest_stats.Counter('servo_host_repair.Full_Repair_Failed'). \
625 increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700626 raise ServoHostRepairTotalFailure(
627 'All attempts at repairing the servo failed:\n%s' %
628 '\n'.join(errors))
629
630
Dan Shi4d478522014-02-14 13:46:32 -0800631 def get_servo(self):
632 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700633
Dan Shi4d478522014-02-14 13:46:32 -0800634 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700635 """
Dan Shi4d478522014-02-14 13:46:32 -0800636 return self._servo
637
638
Fang Denge545abb2014-12-30 18:43:47 -0800639def create_servo_host(dut, servo_args, try_lab_servo=False):
Dan Shi4d478522014-02-14 13:46:32 -0800640 """Create a ServoHost object.
641
Fang Denge545abb2014-12-30 18:43:47 -0800642 The `servo_args` parameter is a dictionary specifying optional
643 Servo client parameter overrides (i.e. a specific host or port).
644 When specified, the caller requires that an exception be raised
645 unless both the ServoHost and the Servo are successfully
646 created.
647
648 There are three possible cases:
649 1. If the DUT is in the Cros test lab then the ServoHost object
650 is only created for the host in the lab. Alternate host or
651 port settings in `servo_host` will be ignored.
652 2. When not case 1., but `servo_args` is not `None`, then create
653 a ServoHost object using `servo_args`.
654 3. Otherwise, return `None`.
655
656 When the `try_lab_servo` parameter is false, it indicates that a
657 ServoHost should not be created for a device in the Cros test
658 lab. The setting of `servo_args` takes precedence over the
659 setting of `try_lab_servo`.
Dan Shi4d478522014-02-14 13:46:32 -0800660
661 @param dut: host name of the host that servo connects. It can be used to
662 lookup the servo in test lab using naming convention.
663 @param servo_args: A dictionary that contains args for creating
664 a ServoHost object,
665 e.g. {'servo_host': '172.11.11.111',
666 'servo_port': 9999}.
667 See comments above.
Fang Denge545abb2014-12-30 18:43:47 -0800668 @param try_lab_servo: Boolean. Whether to create ServoHost for a device
669 in test lab. See above.
Dan Shi4d478522014-02-14 13:46:32 -0800670
671 @returns: A ServoHost object or None. See comments above.
672
673 """
Dan Shi5401d2e2015-09-10 15:42:06 -0700674 required_by_test = servo_args is not None
Dan Shi3b2adf62015-09-02 17:46:54 -0700675 if not utils.is_in_container():
676 is_moblab = utils.is_moblab()
677 else:
678 is_moblab = _CONFIG.get_config_value(
679 'SSP', 'is_moblab', type=bool, default=False)
680 if not is_moblab:
Cheng-Yi Chiang22612862015-08-20 20:39:57 +0800681 dut_is_hostname = not dnsname_mangler.is_ip_address(dut)
682 if dut_is_hostname:
683 lab_servo_hostname = make_servo_hostname(dut)
684 is_in_lab = utils.host_is_in_lab_zone(lab_servo_hostname)
685 else:
686 is_in_lab = False
Simran Basi0739d682015-02-25 16:22:56 -0800687 else:
688 # Servos on Moblab are not in the actual lab.
689 is_in_lab = False
690 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
691 hosts = afe.get_hosts(hostname=dut)
692 if hosts and SERVO_HOST_ATTR in hosts[0].attributes:
693 servo_args = {}
694 servo_args[SERVO_HOST_ATTR] = hosts[0].attributes[SERVO_HOST_ATTR]
695 servo_args[SERVO_PORT_ATTR] = hosts[0].attributes.get(
696 SERVO_PORT_ATTR, 9999)
Dan Shi3b2adf62015-09-02 17:46:54 -0700697 if (utils.is_in_container() and
698 servo_args[SERVO_HOST_ATTR] in ['localhost', '127.0.0.1']):
699 servo_args[SERVO_HOST_ATTR] = _CONFIG.get_config_value(
700 'SSP', 'host_container_ip', type=str, default=None)
Dan Shi4d478522014-02-14 13:46:32 -0800701
Fang Denge545abb2014-12-30 18:43:47 -0800702 if not is_in_lab:
Dan Shi5401d2e2015-09-10 15:42:06 -0700703 if not required_by_test:
Fang Denge545abb2014-12-30 18:43:47 -0800704 return None
705 return ServoHost(required_by_test=True, is_in_lab=False, **servo_args)
706 elif servo_args is not None or try_lab_servo:
Christopher Wileycef1f902014-06-19 11:11:23 -0700707 # Technically, this duplicates the SSH ping done early in the servo
708 # proxy initialization code. However, this ping ends in a couple
709 # seconds when if fails, rather than the 60 seconds it takes to decide
710 # that an SSH ping has timed out. Specifically, that timeout happens
711 # when our servo DNS name resolves, but there is no host at that IP.
712 # TODO(dshi): crbug.com/380773 Remove this ping check once the bug is
713 # fixed. Autotest should not try to verify servo if servo is
714 # not required for the test.
715 ping_config = ping_runner.PingConfig(
716 lab_servo_hostname, count=3,
717 ignore_result=True, ignore_status=True)
718 logging.info('Pinging servo at %s', lab_servo_hostname)
719 host_is_up = ping_runner.PingRunner().ping(ping_config).received > 0
720 if host_is_up:
721 return ServoHost(servo_host=lab_servo_hostname, is_in_lab=is_in_lab,
Dan Shi5401d2e2015-09-10 15:42:06 -0700722 required_by_test=required_by_test)
Dan Shi4d478522014-02-14 13:46:32 -0800723 else:
Dan Shibbb0cb62014-03-24 17:50:57 -0700724 return None