blob: d2409440029990682c59545460a4852a16ebcc75 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
Dan Shi0942b1d2015-03-31 11:07:00 -070021from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080022from autotest_lib.client.common_lib.cros import autoupdater
23from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000027from autotest_lib.client.cros import constants as client_constants
beeps5e8c45a2013-12-17 22:05:11 -080028from autotest_lib.server import site_utils as server_site_utils
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080029from autotest_lib.server.cros import dnsname_mangler
Fang Deng5d518f42013-08-02 14:04:32 -070030from autotest_lib.server.cros.servo import servo
Simran Basi0739d682015-02-25 16:22:56 -080031from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Fang Deng5d518f42013-08-02 14:04:32 -070032from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070033from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070034
35
Simran Basi0739d682015-02-25 16:22:56 -080036# Names of the host attributes in the database that represent the values for
37# the servo_host and servo_port for a servo connected to the DUT.
38SERVO_HOST_ATTR = 'servo_host'
39SERVO_PORT_ATTR = 'servo_port'
40
Dan Shi3b2adf62015-09-02 17:46:54 -070041_CONFIG = global_config.global_config
Simran Basi0739d682015-02-25 16:22:56 -080042
Fang Deng5d518f42013-08-02 14:04:32 -070043class ServoHostException(error.AutoservError):
44 """This is the base class for exceptions raised by ServoHost."""
45 pass
46
47
48class ServoHostVerifyFailure(ServoHostException):
49 """Raised when servo verification fails."""
50 pass
51
52
Fang Dengd4fe7392013-09-20 12:18:21 -070053class ServoHostRepairFailure(ServoHostException):
54 """Raised when a repair method fails to repair a servo host."""
55 pass
56
57
Fang Dengf0ea6142013-10-10 21:43:16 -070058class ServoHostRepairMethodNA(ServoHostException):
59 """Raised when a repair method is not applicable."""
60 pass
61
62
Fang Deng5d518f42013-08-02 14:04:32 -070063class ServoHostRepairTotalFailure(ServoHostException):
64 """Raised if all attempts to repair a servo host fail."""
65 pass
66
67
68def make_servo_hostname(dut_hostname):
69 """Given a DUT's hostname, return the hostname of its servo.
70
71 @param dut_hostname: hostname of a DUT.
72
73 @return hostname of the DUT's servo.
74
75 """
76 host_parts = dut_hostname.split('.')
77 host_parts[0] = host_parts[0] + '-servo'
78 return '.'.join(host_parts)
79
80
81class ServoHost(ssh_host.SSHHost):
82 """Host class for a host that controls a servo, e.g. beaglebone."""
83
84 # Timeout for getting the value of 'pwr_button'.
85 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
86 # Timeout for rebooting servo host.
87 REBOOT_TIMEOUT_SECS = 90
88 HOST_DOWN_TIMEOUT_SECS = 60
89 # Delay after rebooting for servod to become fully functional.
90 REBOOT_DELAY_SECS = 20
91 # Servod process name.
92 SERVOD_PROCESS = 'servod'
Dan Shie5b3c512014-08-21 12:12:09 -070093 # Timeout for initializing servo signals.
94 INITIALIZE_SERVO_TIMEOUT_SECS = 30
Fang Deng5d518f42013-08-02 14:04:32 -070095
Fang Dengd4fe7392013-09-20 12:18:21 -070096 _MAX_POWER_CYCLE_ATTEMPTS = 3
Gabe Black1e1c41b2015-02-04 23:55:15 -080097 _timer = autotest_stats.Timer('servo_host')
Fang Dengd4fe7392013-09-20 12:18:21 -070098
Fang Deng5d518f42013-08-02 14:04:32 -070099
100 def _initialize(self, servo_host='localhost', servo_port=9999,
Dan Shi4d478522014-02-14 13:46:32 -0800101 required_by_test=True, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700102 """Initialize a ServoHost instance.
103
104 A ServoHost instance represents a host that controls a servo.
105
106 @param servo_host: Name of the host where the servod process
107 is running.
108 @param servo_port: Port the servod process is listening on.
Dan Shi4d478522014-02-14 13:46:32 -0800109 @param required_by_test: True if servo is required by test.
110 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
111 to None, for which utils.host_is_in_lab_zone will be
112 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700113
114 """
115 super(ServoHost, self)._initialize(hostname=servo_host,
116 *args, **dargs)
Dan Shi4d478522014-02-14 13:46:32 -0800117 if is_in_lab is None:
118 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
119 else:
120 self._is_in_lab = is_in_lab
Fang Deng5d518f42013-08-02 14:04:32 -0700121 self._is_localhost = (self.hostname == 'localhost')
122 remote = 'http://%s:%s' % (self.hostname, servo_port)
123 self._servod_server = xmlrpclib.ServerProxy(remote)
124 # Commands on the servo host must be run by the superuser. Our account
125 # on Beaglebone is root, but locally we might be running as a
126 # different user. If so - `sudo ' will have to be added to the
127 # commands.
128 if self._is_localhost:
129 self._sudo_required = utils.system_output('id -u') != '0'
130 else:
131 self._sudo_required = False
Dan Shi4d478522014-02-14 13:46:32 -0800132 # Create a cache of Servo object. This must be called at the end of
133 # _initialize to make sure all attributes are set.
134 self._servo = None
Dan Shi90466352015-09-22 15:01:05 -0700135 self.required_by_test = required_by_test
Dan Shi4d478522014-02-14 13:46:32 -0800136 try:
137 self.verify()
Alex Millercc589692014-04-21 18:00:22 -0700138 except Exception:
Dan Shibbb0cb62014-03-24 17:50:57 -0700139 if required_by_test:
140 if not self.is_in_lab():
141 raise
142 else:
143 self.repair_full()
Fang Deng5d518f42013-08-02 14:04:32 -0700144
145
146 def is_in_lab(self):
147 """Check whether the servo host is a lab device.
148
149 @returns: True if the servo host is in Cros Lab, otherwise False.
150
151 """
152 return self._is_in_lab
153
154
155 def is_localhost(self):
156 """Checks whether the servo host points to localhost.
157
158 @returns: True if it points to localhost, otherwise False.
159
160 """
161 return self._is_localhost
162
163
164 def get_servod_server_proxy(self):
165 """Return a proxy that can be used to communicate with servod server.
166
167 @returns: An xmlrpclib.ServerProxy that is connected to the servod
168 server on the host.
169
170 """
171 return self._servod_server
172
173
174 def get_wait_up_processes(self):
175 """Get the list of local processes to wait for in wait_up.
176
177 Override get_wait_up_processes in
178 autotest_lib.client.common_lib.hosts.base_classes.Host.
179 Wait for servod process to go up. Called by base class when
180 rebooting the device.
181
182 """
183 processes = [self.SERVOD_PROCESS]
184 return processes
185
186
beeps5e8c45a2013-12-17 22:05:11 -0800187 def _is_cros_host(self):
188 """Check if a servo host is running chromeos.
189
190 @return: True if the servo host is running chromeos.
191 False if it isn't, or we don't have enough information.
192 """
193 try:
194 result = self.run('grep -q CHROMEOS /etc/lsb-release',
195 ignore_status=True, timeout=10)
196 except (error.AutoservRunError, error.AutoservSSHTimeout):
197 return False
198 return result.exit_status == 0
199
200
Fang Deng5d518f42013-08-02 14:04:32 -0700201 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
202 connect_timeout=None, alive_interval=None):
203 """Override default make_ssh_command to use tuned options.
204
205 Tuning changes:
206 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
207 connection failure. Consistency with remote_access.py.
208
209 - ServerAliveInterval=180; which causes SSH to ping connection every
210 180 seconds. In conjunction with ServerAliveCountMax ensures
211 that if the connection dies, Autotest will bail out quickly.
212
213 - ServerAliveCountMax=3; consistency with remote_access.py.
214
215 - ConnectAttempts=4; reduce flakiness in connection errors;
216 consistency with remote_access.py.
217
218 - UserKnownHostsFile=/dev/null; we don't care about the keys.
219
220 - SSH protocol forced to 2; needed for ServerAliveInterval.
221
222 @param user User name to use for the ssh connection.
223 @param port Port on the target host to use for ssh connection.
224 @param opts Additional options to the ssh command.
225 @param hosts_file Ignored.
226 @param connect_timeout Ignored.
227 @param alive_interval Ignored.
228
229 @returns: An ssh command with the requested settings.
230
231 """
232 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
233 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
234 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
235 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
236 ' -o Protocol=2 -l %s -p %d')
237 return base_command % (opts, user, port)
238
239
240 def _make_scp_cmd(self, sources, dest):
241 """Format scp command.
242
243 Given a list of source paths and a destination path, produces the
244 appropriate scp command for encoding it. Remote paths must be
245 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
246 to allow additional ssh options.
247
248 @param sources: A list of source paths to copy from.
249 @param dest: Destination path to copy to.
250
251 @returns: An scp command that copies |sources| on local machine to
252 |dest| on the remote servo host.
253
254 """
255 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
256 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
257 return command % (self.master_ssh_option,
258 self.port, ' '.join(sources), dest)
259
260
261 def run(self, command, timeout=3600, ignore_status=False,
262 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
263 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
264 """Run a command on the servo host.
265
266 Extends method `run` in SSHHost. If the servo host is a remote device,
267 it will call `run` in SSHost without changing anything.
268 If the servo host is 'localhost', it will call utils.system_output.
269
270 @param command: The command line string.
271 @param timeout: Time limit in seconds before attempting to
272 kill the running process. The run() function
273 will take a few seconds longer than 'timeout'
274 to complete if it has to kill the process.
275 @param ignore_status: Do not raise an exception, no matter
276 what the exit code of the command is.
277 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
278 @param connect_timeout: SSH connection timeout (in seconds)
279 Ignored if host is 'localhost'.
280 @param options: String with additional ssh command options
281 Ignored if host is 'localhost'.
282 @param stdin: Stdin to pass (a string) to the executed command.
283 @param verbose: Log the commands.
284 @param args: Sequence of strings to pass as arguments to command by
285 quoting them in " and escaping their contents if necessary.
286
287 @returns: A utils.CmdResult object.
288
289 @raises AutoservRunError if the command failed.
290 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
291 when servo host is not 'localhost'.
292
293 """
294 run_args = {'command': command, 'timeout': timeout,
295 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
296 'stderr_tee': stderr_tee, 'stdin': stdin,
297 'verbose': verbose, 'args': args}
298 if self.is_localhost():
299 if self._sudo_required:
300 run_args['command'] = 'sudo -n %s' % command
301 try:
302 return utils.run(**run_args)
303 except error.CmdError as e:
304 logging.error(e)
305 raise error.AutoservRunError('command execution error',
306 e.result_obj)
307 else:
308 run_args['connect_timeout'] = connect_timeout
309 run_args['options'] = options
310 return super(ServoHost, self).run(**run_args)
311
312
Dan Shi33412a82014-06-10 15:12:27 -0700313 @_timer.decorate
Fang Deng5d518f42013-08-02 14:04:32 -0700314 def _check_servod(self):
315 """A sanity check of the servod state."""
316 msg_prefix = 'Servod error: %s'
317 error_msg = None
318 try:
319 timeout, _ = retry.timeout(
320 self._servod_server.get, args=('pwr_button', ),
321 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
322 if timeout:
323 error_msg = msg_prefix % 'Request timed out.'
324 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
325 error_msg = msg_prefix % e
326 if error_msg:
327 raise ServoHostVerifyFailure(error_msg)
328
329
Dan Shi33412a82014-06-10 15:12:27 -0700330 def _check_servo_config(self):
331 """Check if config file exists for servod.
332
333 If servod config file does not exist, there is no need to verify if
334 servo is working. The servo could be attached to a board not supported
335 yet.
336
337 @raises ServoHostVerifyFailure if /var/lib/servod/config does not exist.
338
339 """
Simran Basi0739d682015-02-25 16:22:56 -0800340 if self._is_localhost:
341 return
Dan Shi33412a82014-06-10 15:12:27 -0700342 try:
343 self.run('test -f /var/lib/servod/config')
344 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
Ricky Liang86b80182014-06-13 14:39:42 +0800345 if not self._is_cros_host():
346 logging.info('Ignoring servo config check failure, either %s '
347 'is not running chromeos or we cannot find enough '
348 'information about the host.', self.hostname)
349 return
Dan Shi33412a82014-06-10 15:12:27 -0700350 raise ServoHostVerifyFailure(
351 'Servo config file check failed for %s: %s' %
352 (self.hostname, e))
353
354
Dan Shie5b3c512014-08-21 12:12:09 -0700355 def _check_servod_status(self):
356 """Check if servod process is running.
357
358 If servod is not running, there is no need to verify if servo is
359 working. Check the process before making any servod call can avoid
360 long timeout that eventually fail any servod call.
361 If the servo host is set to localhost, failure of servod status check
362 will be ignored, as servo call may use ssh tunnel.
363
364 @raises ServoHostVerifyFailure if servod process does not exist.
365
366 """
367 try:
Dan Shi18040e42014-09-03 11:14:00 -0700368 pids = [str(int(s)) for s in
369 self.run('pgrep servod').stdout.strip().split('\n')]
370 logging.info('servod is running, PID=%s', ','.join(pids))
Dan Shie5b3c512014-08-21 12:12:09 -0700371 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
372 if self._is_localhost:
373 logging.info('Ignoring servod status check failure. servo host '
374 'is set to localhost, servo call may use ssh '
375 'tunnel to go through.')
376 else:
377 raise ServoHostVerifyFailure(
378 'Servod status check failed for %s: %s' %
379 (self.hostname, e))
380
381
Dan Shi0942b1d2015-03-31 11:07:00 -0700382 def get_release_version(self):
383 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
384
385 @returns The version string in lsb-release, under attribute
386 CHROMEOS_RELEASE_VERSION.
387 """
388 lsb_release_content = self.run(
389 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
390 return lsbrelease_utils.get_chromeos_release_version(
391 lsb_release_content=lsb_release_content)
392
393
beeps5e8c45a2013-12-17 22:05:11 -0800394 @_timer.decorate
395 def _update_image(self):
396 """Update the image on the servo host, if needed.
397
J. Richard Barnette84895392015-04-30 12:31:01 -0700398 This method recognizes the following cases:
399 * If the Host is not running Chrome OS, do nothing.
400 * If a previously triggered update is now complete, reboot
401 to the new version.
402 * If the host is processing a previously triggered update,
403 do nothing.
404 * If the host is running a version of Chrome OS different
405 from the default for servo Hosts, trigger an update, but
406 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800407
408 @raises dev_server.DevServerException: If all the devservers are down.
409 @raises site_utils.ParseBuildNameException: If the devserver returns
410 an invalid build name.
411 @raises autoupdater.ChromiumOSError: If something goes wrong in the
412 checking update engine client status or applying an update.
413 @raises AutoservRunError: If the update_engine_client isn't present on
414 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700415
beeps5e8c45a2013-12-17 22:05:11 -0800416 """
Dan Shib795b5a2015-09-24 13:26:35 -0700417 # servod could be running in a Ubuntu workstation.
beeps5e8c45a2013-12-17 22:05:11 -0800418 if not self._is_cros_host():
419 logging.info('Not attempting an update, either %s is not running '
420 'chromeos or we cannot find enough information about '
421 'the host.', self.hostname)
422 return
423
Dan Shib795b5a2015-09-24 13:26:35 -0700424 if lsbrelease_utils.is_moblab():
425 logging.info('Not attempting an update, %s is running moblab.',
426 self.hostname)
427 return
428
Dan Shi3b2adf62015-09-02 17:46:54 -0700429 board = _CONFIG.get_config_value(
J. Richard Barnette84895392015-04-30 12:31:01 -0700430 'CROS', 'servo_board')
431 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
432 target_version = afe.run('get_stable_version', board=board)
Dan Shi3b2adf62015-09-02 17:46:54 -0700433 build_pattern = _CONFIG.get_config_value(
J. Richard Barnette84895392015-04-30 12:31:01 -0700434 'CROS', 'stable_build_pattern')
435 target_build = build_pattern % (board, target_version)
436 target_build_number = server_site_utils.ParseBuildName(
437 target_build)[3]
beeps5e8c45a2013-12-17 22:05:11 -0800438 ds = dev_server.ImageServer.resolve(self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700439 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800440
441 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Dan Shi0942b1d2015-03-31 11:07:00 -0700442 current_build_number = self.get_release_version()
beeps5e8c45a2013-12-17 22:05:11 -0800443 status = updater.check_update_status()
444
445 if status == autoupdater.UPDATER_NEED_REBOOT:
446 logging.info('Rebooting beaglebone host %s with build %s',
447 self.hostname, current_build_number)
448 kwargs = {
J. Richard Barnette9af19632015-09-25 12:18:03 -0700449 'reboot_cmd': 'sleep 1 ; reboot & sleep 10; reboot -f',
beeps5e8c45a2013-12-17 22:05:11 -0800450 'fastsync': True,
451 'label': None,
Dan Shiddd7a0e2014-04-29 11:55:34 -0700452 'wait': False,
beeps5e8c45a2013-12-17 22:05:11 -0800453 }
Dan Shiddd7a0e2014-04-29 11:55:34 -0700454 # Do not wait for reboot to complete. Otherwise, self.reboot call
455 # will log reboot failure if servo does not come back. The logged
456 # reboot failure will lead to test job failure. If the test does not
457 # require servo, we don't want servo failure to fail the test with
458 # error: `Host did not return from reboot` in status.log
459 # If servo does not come back after reboot, exception needs to be
460 # raised, so test requires servo should fail.
beeps5e8c45a2013-12-17 22:05:11 -0800461 self.reboot(**kwargs)
Dan Shiddd7a0e2014-04-29 11:55:34 -0700462 if self.wait_up(timeout=120):
Dan Shi0942b1d2015-03-31 11:07:00 -0700463 current_build_number = self.get_release_version()
Dan Shiddd7a0e2014-04-29 11:55:34 -0700464 logging.info('servo host %s back from reboot, with build %s',
465 self.hostname, current_build_number)
466 else:
467 raise error.AutoservHostError(
468 'servo host %s failed to come back from reboot.' %
469 self.hostname)
beeps5e8c45a2013-12-17 22:05:11 -0800470
471 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
472 logging.info('servo host %s already processing an update, update '
473 'engine client status=%s', self.hostname, status)
J. Richard Barnette84895392015-04-30 12:31:01 -0700474 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800475 logging.info('Using devserver url: %s to trigger update on '
476 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700477 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800478 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700479 ds.stage_artifacts(target_build,
480 artifacts=['full_payload'])
481 except Exception as e:
482 logging.error('Staging artifacts failed: %s', str(e))
483 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800484 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700485 try:
486 updater.trigger_update()
487 except autoupdater.RootFSUpdateError as e:
488 trigger_download_status = 'failed with %s' % str(e)
489 autotest_stats.Counter(
490 'servo_host.RootFSUpdateError').increment()
491 else:
492 trigger_download_status = 'passed'
493 logging.info('Triggered download and update %s for %s, '
494 'update engine currently in status %s',
495 trigger_download_status, self.hostname,
496 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800497 else:
498 logging.info('servo host %s does not require an update.',
499 self.hostname)
500
501
Fang Deng5d518f42013-08-02 14:04:32 -0700502 def verify_software(self):
beeps5e8c45a2013-12-17 22:05:11 -0800503 """Update the servo host and verify it's in a good state.
Fang Deng5d518f42013-08-02 14:04:32 -0700504
505 It overrides the base class function for verify_software.
beeps5e8c45a2013-12-17 22:05:11 -0800506 If an update is available, downloads and applies it. Then verifies:
Fang Deng5d518f42013-08-02 14:04:32 -0700507 1) Whether basic servo command can run successfully.
508 2) Whether USB is in a good state. crbug.com/225932
509
510 @raises ServoHostVerifyFailure if servo host does not pass the checks.
511
512 """
beeps5e8c45a2013-12-17 22:05:11 -0800513 logging.info('Applying an update to the servo host, if necessary.')
514 self._update_image()
Dan Shi33412a82014-06-10 15:12:27 -0700515 self._check_servo_config()
Dan Shie5b3c512014-08-21 12:12:09 -0700516 self._check_servod_status()
517
Dan Shi4d478522014-02-14 13:46:32 -0800518 # If servo is already initialized, we don't need to do it again, call
519 # _check_servod should be enough.
520 if self._servo:
521 self._check_servod()
522 else:
523 self._servo = servo.Servo(servo_host=self)
Dan Shie5b3c512014-08-21 12:12:09 -0700524 timeout, _ = retry.timeout(
525 self._servo.initialize_dut,
526 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
527 if timeout:
528 raise ServoHostVerifyFailure('Servo initialize timed out.')
Fang Deng5d518f42013-08-02 14:04:32 -0700529 logging.info('Sanity checks pass on servo host %s', self.hostname)
530
531
532 def _repair_with_sysrq_reboot(self):
533 """Reboot with magic SysRq key."""
534 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
535 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
536 reboot_cmd='echo "b" > /proc/sysrq-trigger &',
537 fastsync=True)
538 time.sleep(self.REBOOT_DELAY_SECS)
539
540
Fang Dengd4fe7392013-09-20 12:18:21 -0700541 def has_power(self):
542 """Return whether or not the servo host is powered by PoE."""
543 # TODO(fdeng): See crbug.com/302791
544 # For now, assume all servo hosts in the lab have power.
545 return self.is_in_lab()
546
547
548 def power_cycle(self):
549 """Cycle power to this host via PoE if it is a lab device.
550
551 @raises ServoHostRepairFailure if it fails to power cycle the
552 servo host.
553
554 """
555 if self.has_power():
556 try:
557 rpm_client.set_power(self.hostname, 'CYCLE')
558 except (socket.error, xmlrpclib.Error,
559 httplib.BadStatusLine,
560 rpm_client.RemotePowerException) as e:
561 raise ServoHostRepairFailure(
562 'Power cycling %s failed: %s' % (self.hostname, e))
563 else:
564 logging.info('Skipping power cycling, not a lab device.')
565
566
Fang Deng5d518f42013-08-02 14:04:32 -0700567 def _powercycle_to_repair(self):
Fang Dengd4fe7392013-09-20 12:18:21 -0700568 """Power cycle the servo host using PoE.
569
570 @raises ServoHostRepairFailure if it fails to fix the servo host.
Fang Dengf0ea6142013-10-10 21:43:16 -0700571 @raises ServoHostRepairMethodNA if it does not support power.
Fang Dengd4fe7392013-09-20 12:18:21 -0700572
573 """
574 if not self.has_power():
Fang Dengf0ea6142013-10-10 21:43:16 -0700575 raise ServoHostRepairMethodNA('%s does not support power.' %
576 self.hostname)
Fang Dengd4fe7392013-09-20 12:18:21 -0700577 logging.info('Attempting repair via PoE powercycle.')
578 failed_cycles = 0
579 self.power_cycle()
580 while not self.wait_up(timeout=self.REBOOT_TIMEOUT_SECS):
581 failed_cycles += 1
582 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
583 raise ServoHostRepairFailure(
584 'Powercycled host %s %d times; device did not come back'
585 ' online.' % (self.hostname, failed_cycles))
586 self.power_cycle()
587 logging.info('Powercycling was successful after %d failures.',
588 failed_cycles)
589 # Allow some time for servod to get started.
590 time.sleep(self.REBOOT_DELAY_SECS)
Fang Deng5d518f42013-08-02 14:04:32 -0700591
592
593 def repair_full(self):
594 """Attempt to repair servo host.
595
596 This overrides the base class function for repair.
597 Note if the host is not in Cros Lab, the repair procedure
598 will be skipped.
599
600 @raises ServoHostRepairTotalFailure if all attempts fail.
601
602 """
603 if not self.is_in_lab():
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700604 logging.warning('Skip repairing servo host %s: Not a lab device.',
Fang Deng5d518f42013-08-02 14:04:32 -0700605 self.hostname)
606 return
607 logging.info('Attempting to repair servo host %s.', self.hostname)
Dan Shi4d478522014-02-14 13:46:32 -0800608 # Reset the cache to guarantee servo initialization being called later.
609 self._servo = None
Dan Shi0cf92c82014-02-20 15:45:01 -0800610 # TODO(dshi): add self._powercycle_to_repair back to repair_funcs
611 # after crbug.com/336606 is fixed.
612 repair_funcs = [self._repair_with_sysrq_reboot,]
Fang Deng5d518f42013-08-02 14:04:32 -0700613 errors = []
614 for repair_func in repair_funcs:
Fang Dengf0ea6142013-10-10 21:43:16 -0700615 counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
Fang Deng5d518f42013-08-02 14:04:32 -0700616 try:
617 repair_func()
618 self.verify()
Gabe Black1e1c41b2015-02-04 23:55:15 -0800619 autotest_stats.Counter(counter_prefix + 'SUCCEEDED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700620 return
Fang Dengf0ea6142013-10-10 21:43:16 -0700621 except ServoHostRepairMethodNA as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700622 logging.warning('Repair method NA: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800623 autotest_stats.Counter(counter_prefix + 'RepairNA').increment()
Fang Dengf0ea6142013-10-10 21:43:16 -0700624 errors.append(str(e))
Fang Deng5d518f42013-08-02 14:04:32 -0700625 except Exception as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700626 logging.warning('Failed to repair servo: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800627 autotest_stats.Counter(counter_prefix + 'FAILED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700628 errors.append(str(e))
Gabe Black1e1c41b2015-02-04 23:55:15 -0800629 autotest_stats.Counter('servo_host_repair.Full_Repair_Failed'). \
630 increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700631 raise ServoHostRepairTotalFailure(
632 'All attempts at repairing the servo failed:\n%s' %
633 '\n'.join(errors))
634
635
Dan Shi4d478522014-02-14 13:46:32 -0800636 def get_servo(self):
637 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700638
Dan Shi4d478522014-02-14 13:46:32 -0800639 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700640 """
Dan Shi4d478522014-02-14 13:46:32 -0800641 return self._servo
642
643
Fang Denge545abb2014-12-30 18:43:47 -0800644def create_servo_host(dut, servo_args, try_lab_servo=False):
Dan Shi4d478522014-02-14 13:46:32 -0800645 """Create a ServoHost object.
646
Fang Denge545abb2014-12-30 18:43:47 -0800647 The `servo_args` parameter is a dictionary specifying optional
648 Servo client parameter overrides (i.e. a specific host or port).
649 When specified, the caller requires that an exception be raised
650 unless both the ServoHost and the Servo are successfully
651 created.
652
653 There are three possible cases:
654 1. If the DUT is in the Cros test lab then the ServoHost object
655 is only created for the host in the lab. Alternate host or
656 port settings in `servo_host` will be ignored.
657 2. When not case 1., but `servo_args` is not `None`, then create
658 a ServoHost object using `servo_args`.
659 3. Otherwise, return `None`.
660
661 When the `try_lab_servo` parameter is false, it indicates that a
662 ServoHost should not be created for a device in the Cros test
663 lab. The setting of `servo_args` takes precedence over the
664 setting of `try_lab_servo`.
Dan Shi4d478522014-02-14 13:46:32 -0800665
666 @param dut: host name of the host that servo connects. It can be used to
667 lookup the servo in test lab using naming convention.
668 @param servo_args: A dictionary that contains args for creating
669 a ServoHost object,
670 e.g. {'servo_host': '172.11.11.111',
671 'servo_port': 9999}.
672 See comments above.
Fang Denge545abb2014-12-30 18:43:47 -0800673 @param try_lab_servo: Boolean. Whether to create ServoHost for a device
674 in test lab. See above.
Dan Shi4d478522014-02-14 13:46:32 -0800675
676 @returns: A ServoHost object or None. See comments above.
677
678 """
Dan Shi5401d2e2015-09-10 15:42:06 -0700679 required_by_test = servo_args is not None
Dan Shi3b2adf62015-09-02 17:46:54 -0700680 if not utils.is_in_container():
681 is_moblab = utils.is_moblab()
682 else:
683 is_moblab = _CONFIG.get_config_value(
684 'SSP', 'is_moblab', type=bool, default=False)
685 if not is_moblab:
Cheng-Yi Chiang22612862015-08-20 20:39:57 +0800686 dut_is_hostname = not dnsname_mangler.is_ip_address(dut)
687 if dut_is_hostname:
688 lab_servo_hostname = make_servo_hostname(dut)
689 is_in_lab = utils.host_is_in_lab_zone(lab_servo_hostname)
690 else:
691 is_in_lab = False
Simran Basi0739d682015-02-25 16:22:56 -0800692 else:
693 # Servos on Moblab are not in the actual lab.
694 is_in_lab = False
695 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
696 hosts = afe.get_hosts(hostname=dut)
697 if hosts and SERVO_HOST_ATTR in hosts[0].attributes:
698 servo_args = {}
699 servo_args[SERVO_HOST_ATTR] = hosts[0].attributes[SERVO_HOST_ATTR]
700 servo_args[SERVO_PORT_ATTR] = hosts[0].attributes.get(
701 SERVO_PORT_ATTR, 9999)
Dan Shi3b2adf62015-09-02 17:46:54 -0700702 if (utils.is_in_container() and
703 servo_args[SERVO_HOST_ATTR] in ['localhost', '127.0.0.1']):
704 servo_args[SERVO_HOST_ATTR] = _CONFIG.get_config_value(
705 'SSP', 'host_container_ip', type=str, default=None)
Dan Shi4d478522014-02-14 13:46:32 -0800706
Fang Denge545abb2014-12-30 18:43:47 -0800707 if not is_in_lab:
Dan Shi5401d2e2015-09-10 15:42:06 -0700708 if not required_by_test:
Fang Denge545abb2014-12-30 18:43:47 -0800709 return None
710 return ServoHost(required_by_test=True, is_in_lab=False, **servo_args)
711 elif servo_args is not None or try_lab_servo:
Christopher Wileycef1f902014-06-19 11:11:23 -0700712 # Technically, this duplicates the SSH ping done early in the servo
713 # proxy initialization code. However, this ping ends in a couple
714 # seconds when if fails, rather than the 60 seconds it takes to decide
715 # that an SSH ping has timed out. Specifically, that timeout happens
716 # when our servo DNS name resolves, but there is no host at that IP.
717 # TODO(dshi): crbug.com/380773 Remove this ping check once the bug is
718 # fixed. Autotest should not try to verify servo if servo is
719 # not required for the test.
720 ping_config = ping_runner.PingConfig(
721 lab_servo_hostname, count=3,
722 ignore_result=True, ignore_status=True)
723 logging.info('Pinging servo at %s', lab_servo_hostname)
724 host_is_up = ping_runner.PingRunner().ping(ping_config).received > 0
725 if host_is_up:
726 return ServoHost(servo_host=lab_servo_hostname, is_in_lab=is_in_lab,
Dan Shi5401d2e2015-09-10 15:42:06 -0700727 required_by_test=required_by_test)
Dan Shi4d478522014-02-14 13:46:32 -0800728 else:
Dan Shibbb0cb62014-03-24 17:50:57 -0700729 return None