blob: 87dc357491750c7d29522fc237ad164e8cf8ea52 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
Dan Shi0942b1d2015-03-31 11:07:00 -070021from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080022from autotest_lib.client.common_lib.cros import autoupdater
23from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000027from autotest_lib.client.cros import constants as client_constants
beeps5e8c45a2013-12-17 22:05:11 -080028from autotest_lib.server import site_utils as server_site_utils
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080029from autotest_lib.server.cros import dnsname_mangler
Fang Deng5d518f42013-08-02 14:04:32 -070030from autotest_lib.server.cros.servo import servo
Simran Basi0739d682015-02-25 16:22:56 -080031from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Fang Deng5d518f42013-08-02 14:04:32 -070032from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070033from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070034
35
Simran Basi0739d682015-02-25 16:22:56 -080036# Names of the host attributes in the database that represent the values for
37# the servo_host and servo_port for a servo connected to the DUT.
38SERVO_HOST_ATTR = 'servo_host'
39SERVO_PORT_ATTR = 'servo_port'
40
Dan Shi3b2adf62015-09-02 17:46:54 -070041_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080042ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
43 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080044
Fang Deng5d518f42013-08-02 14:04:32 -070045class ServoHostException(error.AutoservError):
46 """This is the base class for exceptions raised by ServoHost."""
47 pass
48
49
50class ServoHostVerifyFailure(ServoHostException):
51 """Raised when servo verification fails."""
52 pass
53
54
Fang Dengd4fe7392013-09-20 12:18:21 -070055class ServoHostRepairFailure(ServoHostException):
56 """Raised when a repair method fails to repair a servo host."""
57 pass
58
59
Fang Dengf0ea6142013-10-10 21:43:16 -070060class ServoHostRepairMethodNA(ServoHostException):
61 """Raised when a repair method is not applicable."""
62 pass
63
64
Fang Deng5d518f42013-08-02 14:04:32 -070065class ServoHostRepairTotalFailure(ServoHostException):
66 """Raised if all attempts to repair a servo host fail."""
67 pass
68
69
70def make_servo_hostname(dut_hostname):
71 """Given a DUT's hostname, return the hostname of its servo.
72
73 @param dut_hostname: hostname of a DUT.
74
75 @return hostname of the DUT's servo.
76
77 """
78 host_parts = dut_hostname.split('.')
79 host_parts[0] = host_parts[0] + '-servo'
80 return '.'.join(host_parts)
81
82
83class ServoHost(ssh_host.SSHHost):
84 """Host class for a host that controls a servo, e.g. beaglebone."""
85
86 # Timeout for getting the value of 'pwr_button'.
87 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
88 # Timeout for rebooting servo host.
89 REBOOT_TIMEOUT_SECS = 90
90 HOST_DOWN_TIMEOUT_SECS = 60
91 # Delay after rebooting for servod to become fully functional.
92 REBOOT_DELAY_SECS = 20
93 # Servod process name.
94 SERVOD_PROCESS = 'servod'
Dan Shie5b3c512014-08-21 12:12:09 -070095 # Timeout for initializing servo signals.
96 INITIALIZE_SERVO_TIMEOUT_SECS = 30
xixuan6cf6d2f2016-01-29 15:29:00 -080097 # Ready test function
98 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070099
Fang Dengd4fe7392013-09-20 12:18:21 -0700100 _MAX_POWER_CYCLE_ATTEMPTS = 3
Gabe Black1e1c41b2015-02-04 23:55:15 -0800101 _timer = autotest_stats.Timer('servo_host')
Fang Dengd4fe7392013-09-20 12:18:21 -0700102
Fang Deng5d518f42013-08-02 14:04:32 -0700103
104 def _initialize(self, servo_host='localhost', servo_port=9999,
Dan Shi4d478522014-02-14 13:46:32 -0800105 required_by_test=True, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700106 """Initialize a ServoHost instance.
107
108 A ServoHost instance represents a host that controls a servo.
109
110 @param servo_host: Name of the host where the servod process
111 is running.
112 @param servo_port: Port the servod process is listening on.
Dan Shi4d478522014-02-14 13:46:32 -0800113 @param required_by_test: True if servo is required by test.
114 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
115 to None, for which utils.host_is_in_lab_zone will be
116 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700117
118 """
119 super(ServoHost, self)._initialize(hostname=servo_host,
120 *args, **dargs)
Dan Shi4d478522014-02-14 13:46:32 -0800121 if is_in_lab is None:
122 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
123 else:
124 self._is_in_lab = is_in_lab
Fang Deng5d518f42013-08-02 14:04:32 -0700125 self._is_localhost = (self.hostname == 'localhost')
xixuan6cf6d2f2016-01-29 15:29:00 -0800126
127 if ENABLE_SSH_TUNNEL_FOR_SERVO:
128 self._servod_server = self.rpc_server_tracker.xmlrpc_connect(
129 None, servo_port, ready_test_name=self.SERVO_READY_METHOD,
130 timeout_seconds=60)
131 else:
132 remote = 'http://%s:%s' % (self.hostname, servo_port)
133 self._servod_server = xmlrpclib.ServerProxy(remote)
134
Fang Deng5d518f42013-08-02 14:04:32 -0700135 # Commands on the servo host must be run by the superuser. Our account
136 # on Beaglebone is root, but locally we might be running as a
137 # different user. If so - `sudo ' will have to be added to the
138 # commands.
139 if self._is_localhost:
140 self._sudo_required = utils.system_output('id -u') != '0'
141 else:
142 self._sudo_required = False
Dan Shi4d478522014-02-14 13:46:32 -0800143 # Create a cache of Servo object. This must be called at the end of
144 # _initialize to make sure all attributes are set.
145 self._servo = None
Dan Shi90466352015-09-22 15:01:05 -0700146 self.required_by_test = required_by_test
Dan Shi4d478522014-02-14 13:46:32 -0800147 try:
148 self.verify()
Alex Millercc589692014-04-21 18:00:22 -0700149 except Exception:
Dan Shibbb0cb62014-03-24 17:50:57 -0700150 if required_by_test:
151 if not self.is_in_lab():
152 raise
153 else:
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800154 self.repair()
Fang Deng5d518f42013-08-02 14:04:32 -0700155
156
157 def is_in_lab(self):
158 """Check whether the servo host is a lab device.
159
160 @returns: True if the servo host is in Cros Lab, otherwise False.
161
162 """
163 return self._is_in_lab
164
165
166 def is_localhost(self):
167 """Checks whether the servo host points to localhost.
168
169 @returns: True if it points to localhost, otherwise False.
170
171 """
172 return self._is_localhost
173
174
175 def get_servod_server_proxy(self):
176 """Return a proxy that can be used to communicate with servod server.
177
178 @returns: An xmlrpclib.ServerProxy that is connected to the servod
179 server on the host.
180
181 """
182 return self._servod_server
183
184
185 def get_wait_up_processes(self):
186 """Get the list of local processes to wait for in wait_up.
187
188 Override get_wait_up_processes in
189 autotest_lib.client.common_lib.hosts.base_classes.Host.
190 Wait for servod process to go up. Called by base class when
191 rebooting the device.
192
193 """
194 processes = [self.SERVOD_PROCESS]
195 return processes
196
197
beeps5e8c45a2013-12-17 22:05:11 -0800198 def _is_cros_host(self):
199 """Check if a servo host is running chromeos.
200
201 @return: True if the servo host is running chromeos.
202 False if it isn't, or we don't have enough information.
203 """
204 try:
205 result = self.run('grep -q CHROMEOS /etc/lsb-release',
206 ignore_status=True, timeout=10)
207 except (error.AutoservRunError, error.AutoservSSHTimeout):
208 return False
209 return result.exit_status == 0
210
211
Fang Deng5d518f42013-08-02 14:04:32 -0700212 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
213 connect_timeout=None, alive_interval=None):
214 """Override default make_ssh_command to use tuned options.
215
216 Tuning changes:
217 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
218 connection failure. Consistency with remote_access.py.
219
220 - ServerAliveInterval=180; which causes SSH to ping connection every
221 180 seconds. In conjunction with ServerAliveCountMax ensures
222 that if the connection dies, Autotest will bail out quickly.
223
224 - ServerAliveCountMax=3; consistency with remote_access.py.
225
226 - ConnectAttempts=4; reduce flakiness in connection errors;
227 consistency with remote_access.py.
228
229 - UserKnownHostsFile=/dev/null; we don't care about the keys.
230
231 - SSH protocol forced to 2; needed for ServerAliveInterval.
232
233 @param user User name to use for the ssh connection.
234 @param port Port on the target host to use for ssh connection.
235 @param opts Additional options to the ssh command.
236 @param hosts_file Ignored.
237 @param connect_timeout Ignored.
238 @param alive_interval Ignored.
239
240 @returns: An ssh command with the requested settings.
241
242 """
243 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
244 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
245 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
246 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
247 ' -o Protocol=2 -l %s -p %d')
248 return base_command % (opts, user, port)
249
250
251 def _make_scp_cmd(self, sources, dest):
252 """Format scp command.
253
254 Given a list of source paths and a destination path, produces the
255 appropriate scp command for encoding it. Remote paths must be
256 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
257 to allow additional ssh options.
258
259 @param sources: A list of source paths to copy from.
260 @param dest: Destination path to copy to.
261
262 @returns: An scp command that copies |sources| on local machine to
263 |dest| on the remote servo host.
264
265 """
266 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
267 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
268 return command % (self.master_ssh_option,
269 self.port, ' '.join(sources), dest)
270
271
272 def run(self, command, timeout=3600, ignore_status=False,
273 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
274 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
275 """Run a command on the servo host.
276
277 Extends method `run` in SSHHost. If the servo host is a remote device,
278 it will call `run` in SSHost without changing anything.
279 If the servo host is 'localhost', it will call utils.system_output.
280
281 @param command: The command line string.
282 @param timeout: Time limit in seconds before attempting to
283 kill the running process. The run() function
284 will take a few seconds longer than 'timeout'
285 to complete if it has to kill the process.
286 @param ignore_status: Do not raise an exception, no matter
287 what the exit code of the command is.
288 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
289 @param connect_timeout: SSH connection timeout (in seconds)
290 Ignored if host is 'localhost'.
291 @param options: String with additional ssh command options
292 Ignored if host is 'localhost'.
293 @param stdin: Stdin to pass (a string) to the executed command.
294 @param verbose: Log the commands.
295 @param args: Sequence of strings to pass as arguments to command by
296 quoting them in " and escaping their contents if necessary.
297
298 @returns: A utils.CmdResult object.
299
300 @raises AutoservRunError if the command failed.
301 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
302 when servo host is not 'localhost'.
303
304 """
305 run_args = {'command': command, 'timeout': timeout,
306 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
307 'stderr_tee': stderr_tee, 'stdin': stdin,
308 'verbose': verbose, 'args': args}
309 if self.is_localhost():
310 if self._sudo_required:
311 run_args['command'] = 'sudo -n %s' % command
312 try:
313 return utils.run(**run_args)
314 except error.CmdError as e:
315 logging.error(e)
316 raise error.AutoservRunError('command execution error',
317 e.result_obj)
318 else:
319 run_args['connect_timeout'] = connect_timeout
320 run_args['options'] = options
321 return super(ServoHost, self).run(**run_args)
322
323
Dan Shi33412a82014-06-10 15:12:27 -0700324 @_timer.decorate
Fang Deng5d518f42013-08-02 14:04:32 -0700325 def _check_servod(self):
326 """A sanity check of the servod state."""
327 msg_prefix = 'Servod error: %s'
328 error_msg = None
329 try:
330 timeout, _ = retry.timeout(
331 self._servod_server.get, args=('pwr_button', ),
332 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
333 if timeout:
334 error_msg = msg_prefix % 'Request timed out.'
335 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
336 error_msg = msg_prefix % e
337 if error_msg:
338 raise ServoHostVerifyFailure(error_msg)
339
340
Dan Shi33412a82014-06-10 15:12:27 -0700341 def _check_servo_config(self):
342 """Check if config file exists for servod.
343
344 If servod config file does not exist, there is no need to verify if
345 servo is working. The servo could be attached to a board not supported
346 yet.
347
348 @raises ServoHostVerifyFailure if /var/lib/servod/config does not exist.
349
350 """
Simran Basi0739d682015-02-25 16:22:56 -0800351 if self._is_localhost:
352 return
Dan Shi33412a82014-06-10 15:12:27 -0700353 try:
354 self.run('test -f /var/lib/servod/config')
355 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
Ricky Liang86b80182014-06-13 14:39:42 +0800356 if not self._is_cros_host():
357 logging.info('Ignoring servo config check failure, either %s '
358 'is not running chromeos or we cannot find enough '
359 'information about the host.', self.hostname)
360 return
Dan Shi33412a82014-06-10 15:12:27 -0700361 raise ServoHostVerifyFailure(
362 'Servo config file check failed for %s: %s' %
363 (self.hostname, e))
364
365
Dan Shie5b3c512014-08-21 12:12:09 -0700366 def _check_servod_status(self):
367 """Check if servod process is running.
368
369 If servod is not running, there is no need to verify if servo is
370 working. Check the process before making any servod call can avoid
371 long timeout that eventually fail any servod call.
372 If the servo host is set to localhost, failure of servod status check
373 will be ignored, as servo call may use ssh tunnel.
374
375 @raises ServoHostVerifyFailure if servod process does not exist.
376
377 """
378 try:
Dan Shi18040e42014-09-03 11:14:00 -0700379 pids = [str(int(s)) for s in
380 self.run('pgrep servod').stdout.strip().split('\n')]
381 logging.info('servod is running, PID=%s', ','.join(pids))
Dan Shie5b3c512014-08-21 12:12:09 -0700382 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
383 if self._is_localhost:
384 logging.info('Ignoring servod status check failure. servo host '
385 'is set to localhost, servo call may use ssh '
386 'tunnel to go through.')
387 else:
388 raise ServoHostVerifyFailure(
389 'Servod status check failed for %s: %s' %
390 (self.hostname, e))
391
392
Dan Shi0942b1d2015-03-31 11:07:00 -0700393 def get_release_version(self):
394 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
395
396 @returns The version string in lsb-release, under attribute
397 CHROMEOS_RELEASE_VERSION.
398 """
399 lsb_release_content = self.run(
400 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
401 return lsbrelease_utils.get_chromeos_release_version(
402 lsb_release_content=lsb_release_content)
403
404
beeps5e8c45a2013-12-17 22:05:11 -0800405 @_timer.decorate
406 def _update_image(self):
407 """Update the image on the servo host, if needed.
408
J. Richard Barnette84895392015-04-30 12:31:01 -0700409 This method recognizes the following cases:
410 * If the Host is not running Chrome OS, do nothing.
411 * If a previously triggered update is now complete, reboot
412 to the new version.
413 * If the host is processing a previously triggered update,
414 do nothing.
415 * If the host is running a version of Chrome OS different
416 from the default for servo Hosts, trigger an update, but
417 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800418
419 @raises dev_server.DevServerException: If all the devservers are down.
420 @raises site_utils.ParseBuildNameException: If the devserver returns
421 an invalid build name.
422 @raises autoupdater.ChromiumOSError: If something goes wrong in the
423 checking update engine client status or applying an update.
424 @raises AutoservRunError: If the update_engine_client isn't present on
425 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700426
beeps5e8c45a2013-12-17 22:05:11 -0800427 """
Dan Shib795b5a2015-09-24 13:26:35 -0700428 # servod could be running in a Ubuntu workstation.
beeps5e8c45a2013-12-17 22:05:11 -0800429 if not self._is_cros_host():
430 logging.info('Not attempting an update, either %s is not running '
431 'chromeos or we cannot find enough information about '
432 'the host.', self.hostname)
433 return
434
Dan Shib795b5a2015-09-24 13:26:35 -0700435 if lsbrelease_utils.is_moblab():
436 logging.info('Not attempting an update, %s is running moblab.',
437 self.hostname)
438 return
439
Dan Shi3b2adf62015-09-02 17:46:54 -0700440 board = _CONFIG.get_config_value(
J. Richard Barnette84895392015-04-30 12:31:01 -0700441 'CROS', 'servo_board')
442 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
443 target_version = afe.run('get_stable_version', board=board)
Dan Shi3b2adf62015-09-02 17:46:54 -0700444 build_pattern = _CONFIG.get_config_value(
J. Richard Barnette84895392015-04-30 12:31:01 -0700445 'CROS', 'stable_build_pattern')
446 target_build = build_pattern % (board, target_version)
447 target_build_number = server_site_utils.ParseBuildName(
448 target_build)[3]
beeps5e8c45a2013-12-17 22:05:11 -0800449 ds = dev_server.ImageServer.resolve(self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700450 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800451
452 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Dan Shi0942b1d2015-03-31 11:07:00 -0700453 current_build_number = self.get_release_version()
beeps5e8c45a2013-12-17 22:05:11 -0800454 status = updater.check_update_status()
455
456 if status == autoupdater.UPDATER_NEED_REBOOT:
457 logging.info('Rebooting beaglebone host %s with build %s',
458 self.hostname, current_build_number)
459 kwargs = {
J. Richard Barnette9af19632015-09-25 12:18:03 -0700460 'reboot_cmd': 'sleep 1 ; reboot & sleep 10; reboot -f',
beeps5e8c45a2013-12-17 22:05:11 -0800461 'fastsync': True,
462 'label': None,
Dan Shiddd7a0e2014-04-29 11:55:34 -0700463 'wait': False,
beeps5e8c45a2013-12-17 22:05:11 -0800464 }
Dan Shiddd7a0e2014-04-29 11:55:34 -0700465 # Do not wait for reboot to complete. Otherwise, self.reboot call
466 # will log reboot failure if servo does not come back. The logged
467 # reboot failure will lead to test job failure. If the test does not
468 # require servo, we don't want servo failure to fail the test with
469 # error: `Host did not return from reboot` in status.log
470 # If servo does not come back after reboot, exception needs to be
471 # raised, so test requires servo should fail.
beeps5e8c45a2013-12-17 22:05:11 -0800472 self.reboot(**kwargs)
Dan Shiddd7a0e2014-04-29 11:55:34 -0700473 if self.wait_up(timeout=120):
Dan Shi0942b1d2015-03-31 11:07:00 -0700474 current_build_number = self.get_release_version()
Dan Shiddd7a0e2014-04-29 11:55:34 -0700475 logging.info('servo host %s back from reboot, with build %s',
476 self.hostname, current_build_number)
477 else:
478 raise error.AutoservHostError(
479 'servo host %s failed to come back from reboot.' %
480 self.hostname)
beeps5e8c45a2013-12-17 22:05:11 -0800481
482 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
483 logging.info('servo host %s already processing an update, update '
484 'engine client status=%s', self.hostname, status)
J. Richard Barnette84895392015-04-30 12:31:01 -0700485 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800486 logging.info('Using devserver url: %s to trigger update on '
487 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700488 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800489 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700490 ds.stage_artifacts(target_build,
491 artifacts=['full_payload'])
492 except Exception as e:
493 logging.error('Staging artifacts failed: %s', str(e))
494 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800495 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700496 try:
497 updater.trigger_update()
498 except autoupdater.RootFSUpdateError as e:
499 trigger_download_status = 'failed with %s' % str(e)
500 autotest_stats.Counter(
501 'servo_host.RootFSUpdateError').increment()
502 else:
503 trigger_download_status = 'passed'
504 logging.info('Triggered download and update %s for %s, '
505 'update engine currently in status %s',
506 trigger_download_status, self.hostname,
507 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800508 else:
509 logging.info('servo host %s does not require an update.',
510 self.hostname)
511
512
Fang Deng5d518f42013-08-02 14:04:32 -0700513 def verify_software(self):
beeps5e8c45a2013-12-17 22:05:11 -0800514 """Update the servo host and verify it's in a good state.
Fang Deng5d518f42013-08-02 14:04:32 -0700515
516 It overrides the base class function for verify_software.
beeps5e8c45a2013-12-17 22:05:11 -0800517 If an update is available, downloads and applies it. Then verifies:
Fang Deng5d518f42013-08-02 14:04:32 -0700518 1) Whether basic servo command can run successfully.
519 2) Whether USB is in a good state. crbug.com/225932
520
521 @raises ServoHostVerifyFailure if servo host does not pass the checks.
522
523 """
beeps5e8c45a2013-12-17 22:05:11 -0800524 logging.info('Applying an update to the servo host, if necessary.')
525 self._update_image()
Dan Shi33412a82014-06-10 15:12:27 -0700526 self._check_servo_config()
Dan Shie5b3c512014-08-21 12:12:09 -0700527 self._check_servod_status()
528
Dan Shi4d478522014-02-14 13:46:32 -0800529 # If servo is already initialized, we don't need to do it again, call
530 # _check_servod should be enough.
531 if self._servo:
532 self._check_servod()
533 else:
534 self._servo = servo.Servo(servo_host=self)
Dan Shie5b3c512014-08-21 12:12:09 -0700535 timeout, _ = retry.timeout(
536 self._servo.initialize_dut,
537 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
538 if timeout:
539 raise ServoHostVerifyFailure('Servo initialize timed out.')
Fang Deng5d518f42013-08-02 14:04:32 -0700540 logging.info('Sanity checks pass on servo host %s', self.hostname)
541
542
543 def _repair_with_sysrq_reboot(self):
544 """Reboot with magic SysRq key."""
545 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
Tom Wai-Hong Tam0880a672015-11-04 05:59:17 +0800546 label=None,
Fang Deng5d518f42013-08-02 14:04:32 -0700547 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
548 reboot_cmd='echo "b" > /proc/sysrq-trigger &',
549 fastsync=True)
550 time.sleep(self.REBOOT_DELAY_SECS)
551
552
Fang Dengd4fe7392013-09-20 12:18:21 -0700553 def has_power(self):
554 """Return whether or not the servo host is powered by PoE."""
555 # TODO(fdeng): See crbug.com/302791
556 # For now, assume all servo hosts in the lab have power.
557 return self.is_in_lab()
558
559
560 def power_cycle(self):
561 """Cycle power to this host via PoE if it is a lab device.
562
563 @raises ServoHostRepairFailure if it fails to power cycle the
564 servo host.
565
566 """
567 if self.has_power():
568 try:
569 rpm_client.set_power(self.hostname, 'CYCLE')
570 except (socket.error, xmlrpclib.Error,
571 httplib.BadStatusLine,
572 rpm_client.RemotePowerException) as e:
573 raise ServoHostRepairFailure(
574 'Power cycling %s failed: %s' % (self.hostname, e))
575 else:
576 logging.info('Skipping power cycling, not a lab device.')
577
578
Fang Deng5d518f42013-08-02 14:04:32 -0700579 def _powercycle_to_repair(self):
Fang Dengd4fe7392013-09-20 12:18:21 -0700580 """Power cycle the servo host using PoE.
581
582 @raises ServoHostRepairFailure if it fails to fix the servo host.
Fang Dengf0ea6142013-10-10 21:43:16 -0700583 @raises ServoHostRepairMethodNA if it does not support power.
Fang Dengd4fe7392013-09-20 12:18:21 -0700584
585 """
586 if not self.has_power():
Fang Dengf0ea6142013-10-10 21:43:16 -0700587 raise ServoHostRepairMethodNA('%s does not support power.' %
588 self.hostname)
Fang Dengd4fe7392013-09-20 12:18:21 -0700589 logging.info('Attempting repair via PoE powercycle.')
590 failed_cycles = 0
591 self.power_cycle()
592 while not self.wait_up(timeout=self.REBOOT_TIMEOUT_SECS):
593 failed_cycles += 1
594 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
595 raise ServoHostRepairFailure(
596 'Powercycled host %s %d times; device did not come back'
597 ' online.' % (self.hostname, failed_cycles))
598 self.power_cycle()
599 logging.info('Powercycling was successful after %d failures.',
600 failed_cycles)
601 # Allow some time for servod to get started.
602 time.sleep(self.REBOOT_DELAY_SECS)
Fang Deng5d518f42013-08-02 14:04:32 -0700603
604
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800605 def repair(self):
Fang Deng5d518f42013-08-02 14:04:32 -0700606 """Attempt to repair servo host.
607
608 This overrides the base class function for repair.
609 Note if the host is not in Cros Lab, the repair procedure
610 will be skipped.
611
612 @raises ServoHostRepairTotalFailure if all attempts fail.
613
614 """
615 if not self.is_in_lab():
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700616 logging.warning('Skip repairing servo host %s: Not a lab device.',
Fang Deng5d518f42013-08-02 14:04:32 -0700617 self.hostname)
618 return
619 logging.info('Attempting to repair servo host %s.', self.hostname)
Dan Shi4d478522014-02-14 13:46:32 -0800620 # Reset the cache to guarantee servo initialization being called later.
621 self._servo = None
Dan Shi0cf92c82014-02-20 15:45:01 -0800622 # TODO(dshi): add self._powercycle_to_repair back to repair_funcs
623 # after crbug.com/336606 is fixed.
624 repair_funcs = [self._repair_with_sysrq_reboot,]
Fang Deng5d518f42013-08-02 14:04:32 -0700625 errors = []
626 for repair_func in repair_funcs:
Fang Dengf0ea6142013-10-10 21:43:16 -0700627 counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
Fang Deng5d518f42013-08-02 14:04:32 -0700628 try:
629 repair_func()
630 self.verify()
Gabe Black1e1c41b2015-02-04 23:55:15 -0800631 autotest_stats.Counter(counter_prefix + 'SUCCEEDED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700632 return
Fang Dengf0ea6142013-10-10 21:43:16 -0700633 except ServoHostRepairMethodNA as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700634 logging.warning('Repair method NA: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800635 autotest_stats.Counter(counter_prefix + 'RepairNA').increment()
Fang Dengf0ea6142013-10-10 21:43:16 -0700636 errors.append(str(e))
Fang Deng5d518f42013-08-02 14:04:32 -0700637 except Exception as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700638 logging.warning('Failed to repair servo: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800639 autotest_stats.Counter(counter_prefix + 'FAILED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700640 errors.append(str(e))
Gabe Black1e1c41b2015-02-04 23:55:15 -0800641 autotest_stats.Counter('servo_host_repair.Full_Repair_Failed'). \
642 increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700643 raise ServoHostRepairTotalFailure(
644 'All attempts at repairing the servo failed:\n%s' %
645 '\n'.join(errors))
646
647
Dan Shi4d478522014-02-14 13:46:32 -0800648 def get_servo(self):
649 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700650
Dan Shi4d478522014-02-14 13:46:32 -0800651 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700652 """
Dan Shi4d478522014-02-14 13:46:32 -0800653 return self._servo
654
655
Fang Denge545abb2014-12-30 18:43:47 -0800656def create_servo_host(dut, servo_args, try_lab_servo=False):
Dan Shi4d478522014-02-14 13:46:32 -0800657 """Create a ServoHost object.
658
Fang Denge545abb2014-12-30 18:43:47 -0800659 The `servo_args` parameter is a dictionary specifying optional
660 Servo client parameter overrides (i.e. a specific host or port).
661 When specified, the caller requires that an exception be raised
662 unless both the ServoHost and the Servo are successfully
663 created.
664
665 There are three possible cases:
666 1. If the DUT is in the Cros test lab then the ServoHost object
667 is only created for the host in the lab. Alternate host or
668 port settings in `servo_host` will be ignored.
669 2. When not case 1., but `servo_args` is not `None`, then create
670 a ServoHost object using `servo_args`.
671 3. Otherwise, return `None`.
672
673 When the `try_lab_servo` parameter is false, it indicates that a
674 ServoHost should not be created for a device in the Cros test
675 lab. The setting of `servo_args` takes precedence over the
676 setting of `try_lab_servo`.
Dan Shi4d478522014-02-14 13:46:32 -0800677
678 @param dut: host name of the host that servo connects. It can be used to
679 lookup the servo in test lab using naming convention.
680 @param servo_args: A dictionary that contains args for creating
681 a ServoHost object,
682 e.g. {'servo_host': '172.11.11.111',
683 'servo_port': 9999}.
684 See comments above.
Fang Denge545abb2014-12-30 18:43:47 -0800685 @param try_lab_servo: Boolean. Whether to create ServoHost for a device
686 in test lab. See above.
Dan Shi4d478522014-02-14 13:46:32 -0800687
688 @returns: A ServoHost object or None. See comments above.
689
690 """
Dan Shi5401d2e2015-09-10 15:42:06 -0700691 required_by_test = servo_args is not None
Dan Shi3b2adf62015-09-02 17:46:54 -0700692 if not utils.is_in_container():
693 is_moblab = utils.is_moblab()
694 else:
695 is_moblab = _CONFIG.get_config_value(
696 'SSP', 'is_moblab', type=bool, default=False)
697 if not is_moblab:
Cheng-Yi Chiang22612862015-08-20 20:39:57 +0800698 dut_is_hostname = not dnsname_mangler.is_ip_address(dut)
699 if dut_is_hostname:
700 lab_servo_hostname = make_servo_hostname(dut)
701 is_in_lab = utils.host_is_in_lab_zone(lab_servo_hostname)
702 else:
703 is_in_lab = False
Simran Basi0739d682015-02-25 16:22:56 -0800704 else:
705 # Servos on Moblab are not in the actual lab.
706 is_in_lab = False
707 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
708 hosts = afe.get_hosts(hostname=dut)
709 if hosts and SERVO_HOST_ATTR in hosts[0].attributes:
710 servo_args = {}
711 servo_args[SERVO_HOST_ATTR] = hosts[0].attributes[SERVO_HOST_ATTR]
712 servo_args[SERVO_PORT_ATTR] = hosts[0].attributes.get(
713 SERVO_PORT_ATTR, 9999)
Dan Shi3b2adf62015-09-02 17:46:54 -0700714 if (utils.is_in_container() and
715 servo_args[SERVO_HOST_ATTR] in ['localhost', '127.0.0.1']):
716 servo_args[SERVO_HOST_ATTR] = _CONFIG.get_config_value(
717 'SSP', 'host_container_ip', type=str, default=None)
Dan Shi4d478522014-02-14 13:46:32 -0800718
Fang Denge545abb2014-12-30 18:43:47 -0800719 if not is_in_lab:
Dan Shi5401d2e2015-09-10 15:42:06 -0700720 if not required_by_test:
Fang Denge545abb2014-12-30 18:43:47 -0800721 return None
722 return ServoHost(required_by_test=True, is_in_lab=False, **servo_args)
723 elif servo_args is not None or try_lab_servo:
Christopher Wileycef1f902014-06-19 11:11:23 -0700724 # Technically, this duplicates the SSH ping done early in the servo
725 # proxy initialization code. However, this ping ends in a couple
726 # seconds when if fails, rather than the 60 seconds it takes to decide
727 # that an SSH ping has timed out. Specifically, that timeout happens
728 # when our servo DNS name resolves, but there is no host at that IP.
729 # TODO(dshi): crbug.com/380773 Remove this ping check once the bug is
730 # fixed. Autotest should not try to verify servo if servo is
731 # not required for the test.
732 ping_config = ping_runner.PingConfig(
733 lab_servo_hostname, count=3,
734 ignore_result=True, ignore_status=True)
735 logging.info('Pinging servo at %s', lab_servo_hostname)
736 host_is_up = ping_runner.PingRunner().ping(ping_config).received > 0
737 if host_is_up:
738 return ServoHost(servo_host=lab_servo_hostname, is_in_lab=is_in_lab,
Dan Shi5401d2e2015-09-10 15:42:06 -0700739 required_by_test=required_by_test)
Dan Shi4d478522014-02-14 13:46:32 -0800740 else:
Dan Shibbb0cb62014-03-24 17:50:57 -0700741 return None