blob: 42b95593210056b62dd1bf959a6f11dce4dde117 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
Dan Shi0942b1d2015-03-31 11:07:00 -070021from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080022from autotest_lib.client.common_lib.cros import autoupdater
23from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000027from autotest_lib.client.cros import constants as client_constants
beeps5e8c45a2013-12-17 22:05:11 -080028from autotest_lib.server import site_utils as server_site_utils
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080029from autotest_lib.server.cros import dnsname_mangler
Fang Deng5d518f42013-08-02 14:04:32 -070030from autotest_lib.server.cros.servo import servo
Simran Basi0739d682015-02-25 16:22:56 -080031from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Fang Deng5d518f42013-08-02 14:04:32 -070032from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070033from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070034
35
Simran Basi0739d682015-02-25 16:22:56 -080036# Names of the host attributes in the database that represent the values for
37# the servo_host and servo_port for a servo connected to the DUT.
38SERVO_HOST_ATTR = 'servo_host'
39SERVO_PORT_ATTR = 'servo_port'
40
Dan Shi3b2adf62015-09-02 17:46:54 -070041_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080042ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
43 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080044
Fang Deng5d518f42013-08-02 14:04:32 -070045class ServoHostException(error.AutoservError):
46 """This is the base class for exceptions raised by ServoHost."""
47 pass
48
49
50class ServoHostVerifyFailure(ServoHostException):
51 """Raised when servo verification fails."""
52 pass
53
54
Fang Dengd4fe7392013-09-20 12:18:21 -070055class ServoHostRepairFailure(ServoHostException):
56 """Raised when a repair method fails to repair a servo host."""
57 pass
58
59
Fang Dengf0ea6142013-10-10 21:43:16 -070060class ServoHostRepairMethodNA(ServoHostException):
61 """Raised when a repair method is not applicable."""
62 pass
63
64
Fang Deng5d518f42013-08-02 14:04:32 -070065class ServoHostRepairTotalFailure(ServoHostException):
66 """Raised if all attempts to repair a servo host fail."""
67 pass
68
69
70def make_servo_hostname(dut_hostname):
71 """Given a DUT's hostname, return the hostname of its servo.
72
73 @param dut_hostname: hostname of a DUT.
74
75 @return hostname of the DUT's servo.
76
77 """
78 host_parts = dut_hostname.split('.')
79 host_parts[0] = host_parts[0] + '-servo'
80 return '.'.join(host_parts)
81
82
83class ServoHost(ssh_host.SSHHost):
84 """Host class for a host that controls a servo, e.g. beaglebone."""
85
86 # Timeout for getting the value of 'pwr_button'.
87 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
88 # Timeout for rebooting servo host.
89 REBOOT_TIMEOUT_SECS = 90
90 HOST_DOWN_TIMEOUT_SECS = 60
91 # Delay after rebooting for servod to become fully functional.
92 REBOOT_DELAY_SECS = 20
93 # Servod process name.
94 SERVOD_PROCESS = 'servod'
Dan Shie5b3c512014-08-21 12:12:09 -070095 # Timeout for initializing servo signals.
96 INITIALIZE_SERVO_TIMEOUT_SECS = 30
xixuan6cf6d2f2016-01-29 15:29:00 -080097 # Ready test function
98 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070099
Fang Dengd4fe7392013-09-20 12:18:21 -0700100 _MAX_POWER_CYCLE_ATTEMPTS = 3
Gabe Black1e1c41b2015-02-04 23:55:15 -0800101 _timer = autotest_stats.Timer('servo_host')
Fang Dengd4fe7392013-09-20 12:18:21 -0700102
Fang Deng5d518f42013-08-02 14:04:32 -0700103
104 def _initialize(self, servo_host='localhost', servo_port=9999,
Dan Shi4d478522014-02-14 13:46:32 -0800105 required_by_test=True, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700106 """Initialize a ServoHost instance.
107
108 A ServoHost instance represents a host that controls a servo.
109
110 @param servo_host: Name of the host where the servod process
111 is running.
112 @param servo_port: Port the servod process is listening on.
Dan Shi4d478522014-02-14 13:46:32 -0800113 @param required_by_test: True if servo is required by test.
114 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
115 to None, for which utils.host_is_in_lab_zone will be
116 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700117
118 """
119 super(ServoHost, self)._initialize(hostname=servo_host,
120 *args, **dargs)
Dan Shi4d478522014-02-14 13:46:32 -0800121 if is_in_lab is None:
122 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
123 else:
124 self._is_in_lab = is_in_lab
Fang Deng5d518f42013-08-02 14:04:32 -0700125 self._is_localhost = (self.hostname == 'localhost')
xixuan6cf6d2f2016-01-29 15:29:00 -0800126
Fang Deng5d518f42013-08-02 14:04:32 -0700127 # Commands on the servo host must be run by the superuser. Our account
128 # on Beaglebone is root, but locally we might be running as a
129 # different user. If so - `sudo ' will have to be added to the
130 # commands.
131 if self._is_localhost:
132 self._sudo_required = utils.system_output('id -u') != '0'
133 else:
134 self._sudo_required = False
Dan Shi4d478522014-02-14 13:46:32 -0800135 # Create a cache of Servo object. This must be called at the end of
136 # _initialize to make sure all attributes are set.
137 self._servo = None
Dan Shi90466352015-09-22 15:01:05 -0700138 self.required_by_test = required_by_test
Dan Shi4d478522014-02-14 13:46:32 -0800139 try:
xixuan2b80c182016-03-28 11:59:30 -0700140 if ENABLE_SSH_TUNNEL_FOR_SERVO:
141 self._servod_server = self.rpc_server_tracker.xmlrpc_connect(
142 None, servo_port, ready_test_name=self.SERVO_READY_METHOD,
143 timeout_seconds=60)
144 else:
145 remote = 'http://%s:%s' % (self.hostname, servo_port)
146 self._servod_server = xmlrpclib.ServerProxy(remote)
Dan Shi4d478522014-02-14 13:46:32 -0800147 self.verify()
Alex Millercc589692014-04-21 18:00:22 -0700148 except Exception:
Dan Shibbb0cb62014-03-24 17:50:57 -0700149 if required_by_test:
150 if not self.is_in_lab():
151 raise
152 else:
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800153 self.repair()
Fang Deng5d518f42013-08-02 14:04:32 -0700154
155
156 def is_in_lab(self):
157 """Check whether the servo host is a lab device.
158
159 @returns: True if the servo host is in Cros Lab, otherwise False.
160
161 """
162 return self._is_in_lab
163
164
165 def is_localhost(self):
166 """Checks whether the servo host points to localhost.
167
168 @returns: True if it points to localhost, otherwise False.
169
170 """
171 return self._is_localhost
172
173
174 def get_servod_server_proxy(self):
175 """Return a proxy that can be used to communicate with servod server.
176
177 @returns: An xmlrpclib.ServerProxy that is connected to the servod
178 server on the host.
179
180 """
181 return self._servod_server
182
183
184 def get_wait_up_processes(self):
185 """Get the list of local processes to wait for in wait_up.
186
187 Override get_wait_up_processes in
188 autotest_lib.client.common_lib.hosts.base_classes.Host.
189 Wait for servod process to go up. Called by base class when
190 rebooting the device.
191
192 """
193 processes = [self.SERVOD_PROCESS]
194 return processes
195
196
beeps5e8c45a2013-12-17 22:05:11 -0800197 def _is_cros_host(self):
198 """Check if a servo host is running chromeos.
199
200 @return: True if the servo host is running chromeos.
201 False if it isn't, or we don't have enough information.
202 """
203 try:
204 result = self.run('grep -q CHROMEOS /etc/lsb-release',
205 ignore_status=True, timeout=10)
206 except (error.AutoservRunError, error.AutoservSSHTimeout):
207 return False
208 return result.exit_status == 0
209
210
Fang Deng5d518f42013-08-02 14:04:32 -0700211 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
212 connect_timeout=None, alive_interval=None):
213 """Override default make_ssh_command to use tuned options.
214
215 Tuning changes:
216 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
217 connection failure. Consistency with remote_access.py.
218
219 - ServerAliveInterval=180; which causes SSH to ping connection every
220 180 seconds. In conjunction with ServerAliveCountMax ensures
221 that if the connection dies, Autotest will bail out quickly.
222
223 - ServerAliveCountMax=3; consistency with remote_access.py.
224
225 - ConnectAttempts=4; reduce flakiness in connection errors;
226 consistency with remote_access.py.
227
228 - UserKnownHostsFile=/dev/null; we don't care about the keys.
229
230 - SSH protocol forced to 2; needed for ServerAliveInterval.
231
232 @param user User name to use for the ssh connection.
233 @param port Port on the target host to use for ssh connection.
234 @param opts Additional options to the ssh command.
235 @param hosts_file Ignored.
236 @param connect_timeout Ignored.
237 @param alive_interval Ignored.
238
239 @returns: An ssh command with the requested settings.
240
241 """
242 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
243 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
244 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
245 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
246 ' -o Protocol=2 -l %s -p %d')
247 return base_command % (opts, user, port)
248
249
250 def _make_scp_cmd(self, sources, dest):
251 """Format scp command.
252
253 Given a list of source paths and a destination path, produces the
254 appropriate scp command for encoding it. Remote paths must be
255 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
256 to allow additional ssh options.
257
258 @param sources: A list of source paths to copy from.
259 @param dest: Destination path to copy to.
260
261 @returns: An scp command that copies |sources| on local machine to
262 |dest| on the remote servo host.
263
264 """
265 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
266 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
267 return command % (self.master_ssh_option,
268 self.port, ' '.join(sources), dest)
269
270
271 def run(self, command, timeout=3600, ignore_status=False,
272 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
273 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
274 """Run a command on the servo host.
275
276 Extends method `run` in SSHHost. If the servo host is a remote device,
277 it will call `run` in SSHost without changing anything.
278 If the servo host is 'localhost', it will call utils.system_output.
279
280 @param command: The command line string.
281 @param timeout: Time limit in seconds before attempting to
282 kill the running process. The run() function
283 will take a few seconds longer than 'timeout'
284 to complete if it has to kill the process.
285 @param ignore_status: Do not raise an exception, no matter
286 what the exit code of the command is.
287 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
288 @param connect_timeout: SSH connection timeout (in seconds)
289 Ignored if host is 'localhost'.
290 @param options: String with additional ssh command options
291 Ignored if host is 'localhost'.
292 @param stdin: Stdin to pass (a string) to the executed command.
293 @param verbose: Log the commands.
294 @param args: Sequence of strings to pass as arguments to command by
295 quoting them in " and escaping their contents if necessary.
296
297 @returns: A utils.CmdResult object.
298
299 @raises AutoservRunError if the command failed.
300 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
301 when servo host is not 'localhost'.
302
303 """
304 run_args = {'command': command, 'timeout': timeout,
305 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
306 'stderr_tee': stderr_tee, 'stdin': stdin,
307 'verbose': verbose, 'args': args}
308 if self.is_localhost():
309 if self._sudo_required:
310 run_args['command'] = 'sudo -n %s' % command
311 try:
312 return utils.run(**run_args)
313 except error.CmdError as e:
314 logging.error(e)
315 raise error.AutoservRunError('command execution error',
316 e.result_obj)
317 else:
318 run_args['connect_timeout'] = connect_timeout
319 run_args['options'] = options
320 return super(ServoHost, self).run(**run_args)
321
322
Dan Shi33412a82014-06-10 15:12:27 -0700323 @_timer.decorate
Fang Deng5d518f42013-08-02 14:04:32 -0700324 def _check_servod(self):
325 """A sanity check of the servod state."""
326 msg_prefix = 'Servod error: %s'
327 error_msg = None
328 try:
329 timeout, _ = retry.timeout(
330 self._servod_server.get, args=('pwr_button', ),
331 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
332 if timeout:
333 error_msg = msg_prefix % 'Request timed out.'
334 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
335 error_msg = msg_prefix % e
336 if error_msg:
337 raise ServoHostVerifyFailure(error_msg)
338
339
Dan Shi33412a82014-06-10 15:12:27 -0700340 def _check_servo_config(self):
341 """Check if config file exists for servod.
342
343 If servod config file does not exist, there is no need to verify if
344 servo is working. The servo could be attached to a board not supported
345 yet.
346
347 @raises ServoHostVerifyFailure if /var/lib/servod/config does not exist.
348
349 """
Simran Basi0739d682015-02-25 16:22:56 -0800350 if self._is_localhost:
351 return
Dan Shi33412a82014-06-10 15:12:27 -0700352 try:
353 self.run('test -f /var/lib/servod/config')
354 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
Ricky Liang86b80182014-06-13 14:39:42 +0800355 if not self._is_cros_host():
356 logging.info('Ignoring servo config check failure, either %s '
357 'is not running chromeos or we cannot find enough '
358 'information about the host.', self.hostname)
359 return
Dan Shi33412a82014-06-10 15:12:27 -0700360 raise ServoHostVerifyFailure(
361 'Servo config file check failed for %s: %s' %
362 (self.hostname, e))
363
364
Dan Shie5b3c512014-08-21 12:12:09 -0700365 def _check_servod_status(self):
366 """Check if servod process is running.
367
368 If servod is not running, there is no need to verify if servo is
369 working. Check the process before making any servod call can avoid
370 long timeout that eventually fail any servod call.
371 If the servo host is set to localhost, failure of servod status check
372 will be ignored, as servo call may use ssh tunnel.
373
374 @raises ServoHostVerifyFailure if servod process does not exist.
375
376 """
377 try:
Dan Shi18040e42014-09-03 11:14:00 -0700378 pids = [str(int(s)) for s in
379 self.run('pgrep servod').stdout.strip().split('\n')]
380 logging.info('servod is running, PID=%s', ','.join(pids))
Dan Shie5b3c512014-08-21 12:12:09 -0700381 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
382 if self._is_localhost:
383 logging.info('Ignoring servod status check failure. servo host '
384 'is set to localhost, servo call may use ssh '
385 'tunnel to go through.')
386 else:
387 raise ServoHostVerifyFailure(
388 'Servod status check failed for %s: %s' %
389 (self.hostname, e))
390
391
Dan Shi0942b1d2015-03-31 11:07:00 -0700392 def get_release_version(self):
393 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
394
395 @returns The version string in lsb-release, under attribute
396 CHROMEOS_RELEASE_VERSION.
397 """
398 lsb_release_content = self.run(
399 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
400 return lsbrelease_utils.get_chromeos_release_version(
401 lsb_release_content=lsb_release_content)
402
403
Richard Barnette3a7697f2016-04-20 11:33:27 -0700404 def _check_for_reboot(self, updater):
405 """
406 Reboot this servo host if an upgrade is waiting.
407
408 If the host has successfully downloaded and finalized a new
409 build, reboot.
410
411 @param updater: a ChromiumOSUpdater instance for checking
412 whether reboot is needed.
413 @return Return a (status, build) tuple reflecting the
414 update_engine status and current build of the host
415 at the end of the call.
416 """
417 current_build_number = self.get_release_version()
418 status = updater.check_update_status()
419 if status == autoupdater.UPDATER_NEED_REBOOT:
420 logging.info('Rebooting beaglebone host %s from build %s',
421 self.hostname, current_build_number)
422 # Tell the reboot() call not to wait for completion.
423 # Otherwise, the call will log reboot failure if servo does
424 # not come back. The logged reboot failure will lead to
425 # test job failure. If the test does not require servo, we
426 # don't want servo failure to fail the test with error:
427 # `Host did not return from reboot` in status.log.
428 reboot_cmd = 'sleep 1 ; reboot & sleep 10; reboot -f',
429 self.reboot(reboot_cmd=reboot_cmd, fastsync=True,
430 label=None, wait=False)
431
432 # We told the reboot() call not to wait, but we need to wait
433 # for the reboot before we continue. Alas. The code from
434 # here below is basically a copy of Host.wait_for_restart(),
435 # with the logging bits ripped out, so that they can't cause
436 # the failure logging problem described above.
437 #
438 # The black stain that this has left on my soul can never be
439 # erased.
440 old_boot_id = self.get_boot_id()
441 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
442 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
443 old_boot_id=old_boot_id):
444 raise error.AutoservHostError(
445 'servo host %s failed to shut down.' %
446 self.hostname)
447 if self.wait_up(timeout=120):
448 current_build_number = self.get_release_version()
449 status = updater.check_update_status()
450 logging.info('servo host %s back from reboot, with build %s',
451 self.hostname, current_build_number)
452 else:
453 raise error.AutoservHostError(
454 'servo host %s failed to come back from reboot.' %
455 self.hostname)
456 return status, current_build_number
457
458
beeps5e8c45a2013-12-17 22:05:11 -0800459 @_timer.decorate
Richard Barnette3a7697f2016-04-20 11:33:27 -0700460 def update_image(self, wait_for_update=False):
beeps5e8c45a2013-12-17 22:05:11 -0800461 """Update the image on the servo host, if needed.
462
J. Richard Barnette84895392015-04-30 12:31:01 -0700463 This method recognizes the following cases:
464 * If the Host is not running Chrome OS, do nothing.
465 * If a previously triggered update is now complete, reboot
466 to the new version.
467 * If the host is processing a previously triggered update,
468 do nothing.
469 * If the host is running a version of Chrome OS different
470 from the default for servo Hosts, trigger an update, but
471 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800472
Richard Barnette3a7697f2016-04-20 11:33:27 -0700473 @param wait_for_update If an update needs to be applied and
474 this is true, then don't return until the update is
475 downloaded and finalized, and the host rebooted.
beeps5e8c45a2013-12-17 22:05:11 -0800476 @raises dev_server.DevServerException: If all the devservers are down.
477 @raises site_utils.ParseBuildNameException: If the devserver returns
478 an invalid build name.
479 @raises autoupdater.ChromiumOSError: If something goes wrong in the
480 checking update engine client status or applying an update.
481 @raises AutoservRunError: If the update_engine_client isn't present on
482 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700483
beeps5e8c45a2013-12-17 22:05:11 -0800484 """
Dan Shib795b5a2015-09-24 13:26:35 -0700485 # servod could be running in a Ubuntu workstation.
beeps5e8c45a2013-12-17 22:05:11 -0800486 if not self._is_cros_host():
487 logging.info('Not attempting an update, either %s is not running '
488 'chromeos or we cannot find enough information about '
489 'the host.', self.hostname)
490 return
491
Dan Shib795b5a2015-09-24 13:26:35 -0700492 if lsbrelease_utils.is_moblab():
493 logging.info('Not attempting an update, %s is running moblab.',
494 self.hostname)
495 return
496
Richard Barnette3a7697f2016-04-20 11:33:27 -0700497 board = _CONFIG.get_config_value('CROS', 'servo_board')
J. Richard Barnette84895392015-04-30 12:31:01 -0700498 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
499 target_version = afe.run('get_stable_version', board=board)
Dan Shi3b2adf62015-09-02 17:46:54 -0700500 build_pattern = _CONFIG.get_config_value(
J. Richard Barnette84895392015-04-30 12:31:01 -0700501 'CROS', 'stable_build_pattern')
502 target_build = build_pattern % (board, target_version)
503 target_build_number = server_site_utils.ParseBuildName(
504 target_build)[3]
beeps5e8c45a2013-12-17 22:05:11 -0800505 ds = dev_server.ImageServer.resolve(self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700506 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800507
508 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700509 status, current_build_number = self._check_for_reboot(updater)
510 update_pending = True
beeps5e8c45a2013-12-17 22:05:11 -0800511 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
512 logging.info('servo host %s already processing an update, update '
513 'engine client status=%s', self.hostname, status)
J. Richard Barnette84895392015-04-30 12:31:01 -0700514 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800515 logging.info('Using devserver url: %s to trigger update on '
516 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700517 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800518 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700519 ds.stage_artifacts(target_build,
520 artifacts=['full_payload'])
521 except Exception as e:
522 logging.error('Staging artifacts failed: %s', str(e))
523 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800524 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700525 try:
526 updater.trigger_update()
527 except autoupdater.RootFSUpdateError as e:
528 trigger_download_status = 'failed with %s' % str(e)
529 autotest_stats.Counter(
530 'servo_host.RootFSUpdateError').increment()
531 else:
532 trigger_download_status = 'passed'
533 logging.info('Triggered download and update %s for %s, '
534 'update engine currently in status %s',
535 trigger_download_status, self.hostname,
536 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800537 else:
538 logging.info('servo host %s does not require an update.',
539 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700540 update_pending = False
541
542 if update_pending and wait_for_update:
543 logging.info('Waiting for servo update to complete.')
544 self.run('update_engine_client --follow', ignore_status=True)
545 status, current_build_number = self._check_for_reboot(updater)
546 if (status != autoupdater.UPDATER_IDLE or
547 current_build_number != target_build_number):
548 logging.error('Update failed; status: %s, '
549 'actual build: %s',
550 status, current_build_number)
551 message = ('Servo host failed to update from %s to %s' %
552 (current_build_number, target_build_number))
553 raise error.AutoservHostError(message)
beeps5e8c45a2013-12-17 22:05:11 -0800554
555
Fang Deng5d518f42013-08-02 14:04:32 -0700556 def verify_software(self):
beeps5e8c45a2013-12-17 22:05:11 -0800557 """Update the servo host and verify it's in a good state.
Fang Deng5d518f42013-08-02 14:04:32 -0700558
559 It overrides the base class function for verify_software.
beeps5e8c45a2013-12-17 22:05:11 -0800560 If an update is available, downloads and applies it. Then verifies:
Fang Deng5d518f42013-08-02 14:04:32 -0700561 1) Whether basic servo command can run successfully.
562 2) Whether USB is in a good state. crbug.com/225932
563
564 @raises ServoHostVerifyFailure if servo host does not pass the checks.
565
566 """
beeps5e8c45a2013-12-17 22:05:11 -0800567 logging.info('Applying an update to the servo host, if necessary.')
Richard Barnette3a7697f2016-04-20 11:33:27 -0700568 self.update_image(wait_for_update=False)
Dan Shi33412a82014-06-10 15:12:27 -0700569 self._check_servo_config()
Dan Shie5b3c512014-08-21 12:12:09 -0700570 self._check_servod_status()
571
Dan Shi4d478522014-02-14 13:46:32 -0800572 # If servo is already initialized, we don't need to do it again, call
573 # _check_servod should be enough.
574 if self._servo:
575 self._check_servod()
576 else:
577 self._servo = servo.Servo(servo_host=self)
Dan Shie5b3c512014-08-21 12:12:09 -0700578 timeout, _ = retry.timeout(
579 self._servo.initialize_dut,
580 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
581 if timeout:
582 raise ServoHostVerifyFailure('Servo initialize timed out.')
Fang Deng5d518f42013-08-02 14:04:32 -0700583 logging.info('Sanity checks pass on servo host %s', self.hostname)
584
585
586 def _repair_with_sysrq_reboot(self):
587 """Reboot with magic SysRq key."""
588 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
Tom Wai-Hong Tam0880a672015-11-04 05:59:17 +0800589 label=None,
Fang Deng5d518f42013-08-02 14:04:32 -0700590 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
591 reboot_cmd='echo "b" > /proc/sysrq-trigger &',
592 fastsync=True)
593 time.sleep(self.REBOOT_DELAY_SECS)
594
595
Fang Dengd4fe7392013-09-20 12:18:21 -0700596 def has_power(self):
597 """Return whether or not the servo host is powered by PoE."""
598 # TODO(fdeng): See crbug.com/302791
599 # For now, assume all servo hosts in the lab have power.
600 return self.is_in_lab()
601
602
603 def power_cycle(self):
604 """Cycle power to this host via PoE if it is a lab device.
605
606 @raises ServoHostRepairFailure if it fails to power cycle the
607 servo host.
608
609 """
610 if self.has_power():
611 try:
612 rpm_client.set_power(self.hostname, 'CYCLE')
613 except (socket.error, xmlrpclib.Error,
614 httplib.BadStatusLine,
615 rpm_client.RemotePowerException) as e:
616 raise ServoHostRepairFailure(
617 'Power cycling %s failed: %s' % (self.hostname, e))
618 else:
619 logging.info('Skipping power cycling, not a lab device.')
620
621
Fang Deng5d518f42013-08-02 14:04:32 -0700622 def _powercycle_to_repair(self):
Fang Dengd4fe7392013-09-20 12:18:21 -0700623 """Power cycle the servo host using PoE.
624
625 @raises ServoHostRepairFailure if it fails to fix the servo host.
Fang Dengf0ea6142013-10-10 21:43:16 -0700626 @raises ServoHostRepairMethodNA if it does not support power.
Fang Dengd4fe7392013-09-20 12:18:21 -0700627
628 """
629 if not self.has_power():
Fang Dengf0ea6142013-10-10 21:43:16 -0700630 raise ServoHostRepairMethodNA('%s does not support power.' %
631 self.hostname)
Fang Dengd4fe7392013-09-20 12:18:21 -0700632 logging.info('Attempting repair via PoE powercycle.')
633 failed_cycles = 0
634 self.power_cycle()
635 while not self.wait_up(timeout=self.REBOOT_TIMEOUT_SECS):
636 failed_cycles += 1
637 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
638 raise ServoHostRepairFailure(
639 'Powercycled host %s %d times; device did not come back'
640 ' online.' % (self.hostname, failed_cycles))
641 self.power_cycle()
642 logging.info('Powercycling was successful after %d failures.',
643 failed_cycles)
644 # Allow some time for servod to get started.
645 time.sleep(self.REBOOT_DELAY_SECS)
Fang Deng5d518f42013-08-02 14:04:32 -0700646
647
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800648 def repair(self):
Fang Deng5d518f42013-08-02 14:04:32 -0700649 """Attempt to repair servo host.
650
651 This overrides the base class function for repair.
652 Note if the host is not in Cros Lab, the repair procedure
653 will be skipped.
654
655 @raises ServoHostRepairTotalFailure if all attempts fail.
656
657 """
658 if not self.is_in_lab():
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700659 logging.warning('Skip repairing servo host %s: Not a lab device.',
Fang Deng5d518f42013-08-02 14:04:32 -0700660 self.hostname)
661 return
662 logging.info('Attempting to repair servo host %s.', self.hostname)
Dan Shi4d478522014-02-14 13:46:32 -0800663 # Reset the cache to guarantee servo initialization being called later.
664 self._servo = None
Dan Shi0cf92c82014-02-20 15:45:01 -0800665 # TODO(dshi): add self._powercycle_to_repair back to repair_funcs
666 # after crbug.com/336606 is fixed.
667 repair_funcs = [self._repair_with_sysrq_reboot,]
Fang Deng5d518f42013-08-02 14:04:32 -0700668 errors = []
669 for repair_func in repair_funcs:
Fang Dengf0ea6142013-10-10 21:43:16 -0700670 counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
Fang Deng5d518f42013-08-02 14:04:32 -0700671 try:
672 repair_func()
673 self.verify()
Gabe Black1e1c41b2015-02-04 23:55:15 -0800674 autotest_stats.Counter(counter_prefix + 'SUCCEEDED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700675 return
Fang Dengf0ea6142013-10-10 21:43:16 -0700676 except ServoHostRepairMethodNA as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700677 logging.warning('Repair method NA: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800678 autotest_stats.Counter(counter_prefix + 'RepairNA').increment()
Fang Dengf0ea6142013-10-10 21:43:16 -0700679 errors.append(str(e))
Fang Deng5d518f42013-08-02 14:04:32 -0700680 except Exception as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700681 logging.warning('Failed to repair servo: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800682 autotest_stats.Counter(counter_prefix + 'FAILED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700683 errors.append(str(e))
Gabe Black1e1c41b2015-02-04 23:55:15 -0800684 autotest_stats.Counter('servo_host_repair.Full_Repair_Failed'). \
685 increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700686 raise ServoHostRepairTotalFailure(
687 'All attempts at repairing the servo failed:\n%s' %
688 '\n'.join(errors))
689
690
Dan Shi4d478522014-02-14 13:46:32 -0800691 def get_servo(self):
692 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700693
Dan Shi4d478522014-02-14 13:46:32 -0800694 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700695 """
Dan Shi4d478522014-02-14 13:46:32 -0800696 return self._servo
697
698
Fang Denge545abb2014-12-30 18:43:47 -0800699def create_servo_host(dut, servo_args, try_lab_servo=False):
Dan Shi4d478522014-02-14 13:46:32 -0800700 """Create a ServoHost object.
701
Fang Denge545abb2014-12-30 18:43:47 -0800702 The `servo_args` parameter is a dictionary specifying optional
703 Servo client parameter overrides (i.e. a specific host or port).
704 When specified, the caller requires that an exception be raised
705 unless both the ServoHost and the Servo are successfully
706 created.
707
708 There are three possible cases:
709 1. If the DUT is in the Cros test lab then the ServoHost object
710 is only created for the host in the lab. Alternate host or
711 port settings in `servo_host` will be ignored.
712 2. When not case 1., but `servo_args` is not `None`, then create
713 a ServoHost object using `servo_args`.
714 3. Otherwise, return `None`.
715
716 When the `try_lab_servo` parameter is false, it indicates that a
717 ServoHost should not be created for a device in the Cros test
718 lab. The setting of `servo_args` takes precedence over the
719 setting of `try_lab_servo`.
Dan Shi4d478522014-02-14 13:46:32 -0800720
721 @param dut: host name of the host that servo connects. It can be used to
722 lookup the servo in test lab using naming convention.
723 @param servo_args: A dictionary that contains args for creating
724 a ServoHost object,
725 e.g. {'servo_host': '172.11.11.111',
726 'servo_port': 9999}.
727 See comments above.
Fang Denge545abb2014-12-30 18:43:47 -0800728 @param try_lab_servo: Boolean. Whether to create ServoHost for a device
729 in test lab. See above.
Dan Shi4d478522014-02-14 13:46:32 -0800730
731 @returns: A ServoHost object or None. See comments above.
732
733 """
Dan Shi5401d2e2015-09-10 15:42:06 -0700734 required_by_test = servo_args is not None
Dan Shi3b2adf62015-09-02 17:46:54 -0700735 if not utils.is_in_container():
736 is_moblab = utils.is_moblab()
737 else:
738 is_moblab = _CONFIG.get_config_value(
739 'SSP', 'is_moblab', type=bool, default=False)
740 if not is_moblab:
Cheng-Yi Chiang22612862015-08-20 20:39:57 +0800741 dut_is_hostname = not dnsname_mangler.is_ip_address(dut)
742 if dut_is_hostname:
743 lab_servo_hostname = make_servo_hostname(dut)
744 is_in_lab = utils.host_is_in_lab_zone(lab_servo_hostname)
745 else:
746 is_in_lab = False
Simran Basi0739d682015-02-25 16:22:56 -0800747 else:
748 # Servos on Moblab are not in the actual lab.
749 is_in_lab = False
750 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
751 hosts = afe.get_hosts(hostname=dut)
752 if hosts and SERVO_HOST_ATTR in hosts[0].attributes:
753 servo_args = {}
754 servo_args[SERVO_HOST_ATTR] = hosts[0].attributes[SERVO_HOST_ATTR]
755 servo_args[SERVO_PORT_ATTR] = hosts[0].attributes.get(
756 SERVO_PORT_ATTR, 9999)
Dan Shi3b2adf62015-09-02 17:46:54 -0700757 if (utils.is_in_container() and
758 servo_args[SERVO_HOST_ATTR] in ['localhost', '127.0.0.1']):
759 servo_args[SERVO_HOST_ATTR] = _CONFIG.get_config_value(
760 'SSP', 'host_container_ip', type=str, default=None)
Dan Shi4d478522014-02-14 13:46:32 -0800761
Fang Denge545abb2014-12-30 18:43:47 -0800762 if not is_in_lab:
Dan Shi5401d2e2015-09-10 15:42:06 -0700763 if not required_by_test:
Fang Denge545abb2014-12-30 18:43:47 -0800764 return None
765 return ServoHost(required_by_test=True, is_in_lab=False, **servo_args)
766 elif servo_args is not None or try_lab_servo:
Christopher Wileycef1f902014-06-19 11:11:23 -0700767 # Technically, this duplicates the SSH ping done early in the servo
768 # proxy initialization code. However, this ping ends in a couple
769 # seconds when if fails, rather than the 60 seconds it takes to decide
770 # that an SSH ping has timed out. Specifically, that timeout happens
771 # when our servo DNS name resolves, but there is no host at that IP.
772 # TODO(dshi): crbug.com/380773 Remove this ping check once the bug is
773 # fixed. Autotest should not try to verify servo if servo is
774 # not required for the test.
775 ping_config = ping_runner.PingConfig(
776 lab_servo_hostname, count=3,
777 ignore_result=True, ignore_status=True)
778 logging.info('Pinging servo at %s', lab_servo_hostname)
779 host_is_up = ping_runner.PingRunner().ping(ping_config).received > 0
780 if host_is_up:
781 return ServoHost(servo_host=lab_servo_hostname, is_in_lab=is_in_lab,
Dan Shi5401d2e2015-09-10 15:42:06 -0700782 required_by_test=required_by_test)
Dan Shi4d478522014-02-14 13:46:32 -0800783 else:
Dan Shibbb0cb62014-03-24 17:50:57 -0700784 return None