blob: 0f36a5ce8792e913ffb40e390d4f85aedfbeac0b [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
Dan Shi0942b1d2015-03-31 11:07:00 -070021from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080022from autotest_lib.client.common_lib.cros import autoupdater
23from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000027from autotest_lib.client.cros import constants as client_constants
beeps5e8c45a2013-12-17 22:05:11 -080028from autotest_lib.server import site_utils as server_site_utils
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080029from autotest_lib.server.cros import dnsname_mangler
Fang Deng5d518f42013-08-02 14:04:32 -070030from autotest_lib.server.cros.servo import servo
Simran Basi0739d682015-02-25 16:22:56 -080031from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Fang Deng5d518f42013-08-02 14:04:32 -070032from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070033from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070034
35
Simran Basi0739d682015-02-25 16:22:56 -080036# Names of the host attributes in the database that represent the values for
37# the servo_host and servo_port for a servo connected to the DUT.
38SERVO_HOST_ATTR = 'servo_host'
39SERVO_PORT_ATTR = 'servo_port'
40
Dan Shi3b2adf62015-09-02 17:46:54 -070041_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080042ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
43 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080044
Fang Deng5d518f42013-08-02 14:04:32 -070045class ServoHostException(error.AutoservError):
46 """This is the base class for exceptions raised by ServoHost."""
47 pass
48
49
50class ServoHostVerifyFailure(ServoHostException):
51 """Raised when servo verification fails."""
52 pass
53
54
Fang Dengd4fe7392013-09-20 12:18:21 -070055class ServoHostRepairFailure(ServoHostException):
56 """Raised when a repair method fails to repair a servo host."""
57 pass
58
59
Fang Dengf0ea6142013-10-10 21:43:16 -070060class ServoHostRepairMethodNA(ServoHostException):
61 """Raised when a repair method is not applicable."""
62 pass
63
64
Fang Deng5d518f42013-08-02 14:04:32 -070065class ServoHostRepairTotalFailure(ServoHostException):
66 """Raised if all attempts to repair a servo host fail."""
67 pass
68
69
70def make_servo_hostname(dut_hostname):
71 """Given a DUT's hostname, return the hostname of its servo.
72
73 @param dut_hostname: hostname of a DUT.
74
75 @return hostname of the DUT's servo.
76
77 """
78 host_parts = dut_hostname.split('.')
79 host_parts[0] = host_parts[0] + '-servo'
80 return '.'.join(host_parts)
81
82
Kevin Chengd9dfa582016-05-04 09:37:34 -070083def servo_host_is_up(servo_hostname):
84 """
85 Given a servo host name, return if it's up or not.
86
87 @param servo_hostname: hostname of the servo host.
88
89 @return True if it's up, False otherwise
90 """
91 # Technically, this duplicates the SSH ping done early in the servo
92 # proxy initialization code. However, this ping ends in a couple
93 # seconds when if fails, rather than the 60 seconds it takes to decide
94 # that an SSH ping has timed out. Specifically, that timeout happens
95 # when our servo DNS name resolves, but there is no host at that IP.
96 logging.info('Pinging servo host at %s', servo_hostname)
97 ping_config = ping_runner.PingConfig(
98 servo_hostname, count=3,
99 ignore_result=True, ignore_status=True)
100 return ping_runner.PingRunner().ping(ping_config).received > 0
101
102
Fang Deng5d518f42013-08-02 14:04:32 -0700103class ServoHost(ssh_host.SSHHost):
104 """Host class for a host that controls a servo, e.g. beaglebone."""
105
106 # Timeout for getting the value of 'pwr_button'.
107 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
108 # Timeout for rebooting servo host.
109 REBOOT_TIMEOUT_SECS = 90
110 HOST_DOWN_TIMEOUT_SECS = 60
111 # Delay after rebooting for servod to become fully functional.
112 REBOOT_DELAY_SECS = 20
113 # Servod process name.
114 SERVOD_PROCESS = 'servod'
Dan Shie5b3c512014-08-21 12:12:09 -0700115 # Timeout for initializing servo signals.
116 INITIALIZE_SERVO_TIMEOUT_SECS = 30
xixuan6cf6d2f2016-01-29 15:29:00 -0800117 # Ready test function
118 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -0700119
Fang Dengd4fe7392013-09-20 12:18:21 -0700120 _MAX_POWER_CYCLE_ATTEMPTS = 3
Gabe Black1e1c41b2015-02-04 23:55:15 -0800121 _timer = autotest_stats.Timer('servo_host')
Fang Dengd4fe7392013-09-20 12:18:21 -0700122
Fang Deng5d518f42013-08-02 14:04:32 -0700123
124 def _initialize(self, servo_host='localhost', servo_port=9999,
Dan Shi4d478522014-02-14 13:46:32 -0800125 required_by_test=True, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700126 """Initialize a ServoHost instance.
127
128 A ServoHost instance represents a host that controls a servo.
129
130 @param servo_host: Name of the host where the servod process
131 is running.
132 @param servo_port: Port the servod process is listening on.
Dan Shi4d478522014-02-14 13:46:32 -0800133 @param required_by_test: True if servo is required by test.
134 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
135 to None, for which utils.host_is_in_lab_zone will be
136 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700137
138 """
139 super(ServoHost, self)._initialize(hostname=servo_host,
140 *args, **dargs)
Dan Shi4d478522014-02-14 13:46:32 -0800141 if is_in_lab is None:
142 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
143 else:
144 self._is_in_lab = is_in_lab
Fang Deng5d518f42013-08-02 14:04:32 -0700145 self._is_localhost = (self.hostname == 'localhost')
xixuan6cf6d2f2016-01-29 15:29:00 -0800146
Fang Deng5d518f42013-08-02 14:04:32 -0700147 # Commands on the servo host must be run by the superuser. Our account
148 # on Beaglebone is root, but locally we might be running as a
149 # different user. If so - `sudo ' will have to be added to the
150 # commands.
151 if self._is_localhost:
152 self._sudo_required = utils.system_output('id -u') != '0'
153 else:
154 self._sudo_required = False
Dan Shi4d478522014-02-14 13:46:32 -0800155 # Create a cache of Servo object. This must be called at the end of
156 # _initialize to make sure all attributes are set.
157 self._servo = None
Dan Shi90466352015-09-22 15:01:05 -0700158 self.required_by_test = required_by_test
Dan Shi4d478522014-02-14 13:46:32 -0800159 try:
xixuan2b80c182016-03-28 11:59:30 -0700160 if ENABLE_SSH_TUNNEL_FOR_SERVO:
161 self._servod_server = self.rpc_server_tracker.xmlrpc_connect(
162 None, servo_port, ready_test_name=self.SERVO_READY_METHOD,
163 timeout_seconds=60)
164 else:
165 remote = 'http://%s:%s' % (self.hostname, servo_port)
166 self._servod_server = xmlrpclib.ServerProxy(remote)
Dan Shi4d478522014-02-14 13:46:32 -0800167 self.verify()
Alex Millercc589692014-04-21 18:00:22 -0700168 except Exception:
Dan Shibbb0cb62014-03-24 17:50:57 -0700169 if required_by_test:
170 if not self.is_in_lab():
171 raise
172 else:
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800173 self.repair()
Fang Deng5d518f42013-08-02 14:04:32 -0700174
175
176 def is_in_lab(self):
177 """Check whether the servo host is a lab device.
178
179 @returns: True if the servo host is in Cros Lab, otherwise False.
180
181 """
182 return self._is_in_lab
183
184
185 def is_localhost(self):
186 """Checks whether the servo host points to localhost.
187
188 @returns: True if it points to localhost, otherwise False.
189
190 """
191 return self._is_localhost
192
193
194 def get_servod_server_proxy(self):
195 """Return a proxy that can be used to communicate with servod server.
196
197 @returns: An xmlrpclib.ServerProxy that is connected to the servod
198 server on the host.
199
200 """
201 return self._servod_server
202
203
204 def get_wait_up_processes(self):
205 """Get the list of local processes to wait for in wait_up.
206
207 Override get_wait_up_processes in
208 autotest_lib.client.common_lib.hosts.base_classes.Host.
209 Wait for servod process to go up. Called by base class when
210 rebooting the device.
211
212 """
213 processes = [self.SERVOD_PROCESS]
214 return processes
215
216
beeps5e8c45a2013-12-17 22:05:11 -0800217 def _is_cros_host(self):
218 """Check if a servo host is running chromeos.
219
220 @return: True if the servo host is running chromeos.
221 False if it isn't, or we don't have enough information.
222 """
223 try:
224 result = self.run('grep -q CHROMEOS /etc/lsb-release',
225 ignore_status=True, timeout=10)
226 except (error.AutoservRunError, error.AutoservSSHTimeout):
227 return False
228 return result.exit_status == 0
229
230
Fang Deng5d518f42013-08-02 14:04:32 -0700231 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
232 connect_timeout=None, alive_interval=None):
233 """Override default make_ssh_command to use tuned options.
234
235 Tuning changes:
236 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
237 connection failure. Consistency with remote_access.py.
238
239 - ServerAliveInterval=180; which causes SSH to ping connection every
240 180 seconds. In conjunction with ServerAliveCountMax ensures
241 that if the connection dies, Autotest will bail out quickly.
242
243 - ServerAliveCountMax=3; consistency with remote_access.py.
244
245 - ConnectAttempts=4; reduce flakiness in connection errors;
246 consistency with remote_access.py.
247
248 - UserKnownHostsFile=/dev/null; we don't care about the keys.
249
250 - SSH protocol forced to 2; needed for ServerAliveInterval.
251
252 @param user User name to use for the ssh connection.
253 @param port Port on the target host to use for ssh connection.
254 @param opts Additional options to the ssh command.
255 @param hosts_file Ignored.
256 @param connect_timeout Ignored.
257 @param alive_interval Ignored.
258
259 @returns: An ssh command with the requested settings.
260
261 """
262 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
263 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
264 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
265 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
266 ' -o Protocol=2 -l %s -p %d')
267 return base_command % (opts, user, port)
268
269
270 def _make_scp_cmd(self, sources, dest):
271 """Format scp command.
272
273 Given a list of source paths and a destination path, produces the
274 appropriate scp command for encoding it. Remote paths must be
275 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
276 to allow additional ssh options.
277
278 @param sources: A list of source paths to copy from.
279 @param dest: Destination path to copy to.
280
281 @returns: An scp command that copies |sources| on local machine to
282 |dest| on the remote servo host.
283
284 """
285 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
286 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
287 return command % (self.master_ssh_option,
288 self.port, ' '.join(sources), dest)
289
290
291 def run(self, command, timeout=3600, ignore_status=False,
292 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
293 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
294 """Run a command on the servo host.
295
296 Extends method `run` in SSHHost. If the servo host is a remote device,
297 it will call `run` in SSHost without changing anything.
298 If the servo host is 'localhost', it will call utils.system_output.
299
300 @param command: The command line string.
301 @param timeout: Time limit in seconds before attempting to
302 kill the running process. The run() function
303 will take a few seconds longer than 'timeout'
304 to complete if it has to kill the process.
305 @param ignore_status: Do not raise an exception, no matter
306 what the exit code of the command is.
307 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
308 @param connect_timeout: SSH connection timeout (in seconds)
309 Ignored if host is 'localhost'.
310 @param options: String with additional ssh command options
311 Ignored if host is 'localhost'.
312 @param stdin: Stdin to pass (a string) to the executed command.
313 @param verbose: Log the commands.
314 @param args: Sequence of strings to pass as arguments to command by
315 quoting them in " and escaping their contents if necessary.
316
317 @returns: A utils.CmdResult object.
318
319 @raises AutoservRunError if the command failed.
320 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
321 when servo host is not 'localhost'.
322
323 """
324 run_args = {'command': command, 'timeout': timeout,
325 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
326 'stderr_tee': stderr_tee, 'stdin': stdin,
327 'verbose': verbose, 'args': args}
328 if self.is_localhost():
329 if self._sudo_required:
330 run_args['command'] = 'sudo -n %s' % command
331 try:
332 return utils.run(**run_args)
333 except error.CmdError as e:
334 logging.error(e)
335 raise error.AutoservRunError('command execution error',
336 e.result_obj)
337 else:
338 run_args['connect_timeout'] = connect_timeout
339 run_args['options'] = options
340 return super(ServoHost, self).run(**run_args)
341
342
Dan Shi33412a82014-06-10 15:12:27 -0700343 @_timer.decorate
Fang Deng5d518f42013-08-02 14:04:32 -0700344 def _check_servod(self):
345 """A sanity check of the servod state."""
346 msg_prefix = 'Servod error: %s'
347 error_msg = None
348 try:
349 timeout, _ = retry.timeout(
350 self._servod_server.get, args=('pwr_button', ),
351 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
352 if timeout:
353 error_msg = msg_prefix % 'Request timed out.'
354 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
355 error_msg = msg_prefix % e
356 if error_msg:
357 raise ServoHostVerifyFailure(error_msg)
358
359
Dan Shi33412a82014-06-10 15:12:27 -0700360 def _check_servo_config(self):
361 """Check if config file exists for servod.
362
363 If servod config file does not exist, there is no need to verify if
364 servo is working. The servo could be attached to a board not supported
365 yet.
366
367 @raises ServoHostVerifyFailure if /var/lib/servod/config does not exist.
368
369 """
Simran Basi0739d682015-02-25 16:22:56 -0800370 if self._is_localhost:
371 return
Dan Shi33412a82014-06-10 15:12:27 -0700372 try:
373 self.run('test -f /var/lib/servod/config')
374 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
Ricky Liang86b80182014-06-13 14:39:42 +0800375 if not self._is_cros_host():
376 logging.info('Ignoring servo config check failure, either %s '
377 'is not running chromeos or we cannot find enough '
378 'information about the host.', self.hostname)
379 return
Dan Shi33412a82014-06-10 15:12:27 -0700380 raise ServoHostVerifyFailure(
381 'Servo config file check failed for %s: %s' %
382 (self.hostname, e))
383
384
Dan Shie5b3c512014-08-21 12:12:09 -0700385 def _check_servod_status(self):
386 """Check if servod process is running.
387
388 If servod is not running, there is no need to verify if servo is
389 working. Check the process before making any servod call can avoid
390 long timeout that eventually fail any servod call.
391 If the servo host is set to localhost, failure of servod status check
392 will be ignored, as servo call may use ssh tunnel.
393
394 @raises ServoHostVerifyFailure if servod process does not exist.
395
396 """
397 try:
Dan Shi18040e42014-09-03 11:14:00 -0700398 pids = [str(int(s)) for s in
399 self.run('pgrep servod').stdout.strip().split('\n')]
400 logging.info('servod is running, PID=%s', ','.join(pids))
Dan Shie5b3c512014-08-21 12:12:09 -0700401 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
402 if self._is_localhost:
403 logging.info('Ignoring servod status check failure. servo host '
404 'is set to localhost, servo call may use ssh '
405 'tunnel to go through.')
406 else:
407 raise ServoHostVerifyFailure(
408 'Servod status check failed for %s: %s' %
409 (self.hostname, e))
410
411
Dan Shi0942b1d2015-03-31 11:07:00 -0700412 def get_release_version(self):
413 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
414
415 @returns The version string in lsb-release, under attribute
416 CHROMEOS_RELEASE_VERSION.
417 """
418 lsb_release_content = self.run(
419 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
420 return lsbrelease_utils.get_chromeos_release_version(
421 lsb_release_content=lsb_release_content)
422
423
Richard Barnette3a7697f2016-04-20 11:33:27 -0700424 def _check_for_reboot(self, updater):
425 """
426 Reboot this servo host if an upgrade is waiting.
427
428 If the host has successfully downloaded and finalized a new
429 build, reboot.
430
431 @param updater: a ChromiumOSUpdater instance for checking
432 whether reboot is needed.
433 @return Return a (status, build) tuple reflecting the
434 update_engine status and current build of the host
435 at the end of the call.
436 """
437 current_build_number = self.get_release_version()
438 status = updater.check_update_status()
439 if status == autoupdater.UPDATER_NEED_REBOOT:
440 logging.info('Rebooting beaglebone host %s from build %s',
441 self.hostname, current_build_number)
442 # Tell the reboot() call not to wait for completion.
443 # Otherwise, the call will log reboot failure if servo does
444 # not come back. The logged reboot failure will lead to
445 # test job failure. If the test does not require servo, we
446 # don't want servo failure to fail the test with error:
447 # `Host did not return from reboot` in status.log.
448 reboot_cmd = 'sleep 1 ; reboot & sleep 10; reboot -f',
Richard Barnetteab9769f2016-06-01 15:01:44 -0700449 self.reboot(reboot_cmd=reboot_cmd, fastsync=True, wait=False)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700450
451 # We told the reboot() call not to wait, but we need to wait
452 # for the reboot before we continue. Alas. The code from
453 # here below is basically a copy of Host.wait_for_restart(),
454 # with the logging bits ripped out, so that they can't cause
455 # the failure logging problem described above.
456 #
457 # The black stain that this has left on my soul can never be
458 # erased.
459 old_boot_id = self.get_boot_id()
460 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
461 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
462 old_boot_id=old_boot_id):
463 raise error.AutoservHostError(
464 'servo host %s failed to shut down.' %
465 self.hostname)
466 if self.wait_up(timeout=120):
467 current_build_number = self.get_release_version()
468 status = updater.check_update_status()
469 logging.info('servo host %s back from reboot, with build %s',
470 self.hostname, current_build_number)
471 else:
472 raise error.AutoservHostError(
473 'servo host %s failed to come back from reboot.' %
474 self.hostname)
475 return status, current_build_number
476
477
beeps5e8c45a2013-12-17 22:05:11 -0800478 @_timer.decorate
Richard Barnette3a7697f2016-04-20 11:33:27 -0700479 def update_image(self, wait_for_update=False):
beeps5e8c45a2013-12-17 22:05:11 -0800480 """Update the image on the servo host, if needed.
481
J. Richard Barnette84895392015-04-30 12:31:01 -0700482 This method recognizes the following cases:
483 * If the Host is not running Chrome OS, do nothing.
484 * If a previously triggered update is now complete, reboot
485 to the new version.
486 * If the host is processing a previously triggered update,
487 do nothing.
488 * If the host is running a version of Chrome OS different
489 from the default for servo Hosts, trigger an update, but
490 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800491
Richard Barnette3a7697f2016-04-20 11:33:27 -0700492 @param wait_for_update If an update needs to be applied and
493 this is true, then don't return until the update is
494 downloaded and finalized, and the host rebooted.
beeps5e8c45a2013-12-17 22:05:11 -0800495 @raises dev_server.DevServerException: If all the devservers are down.
496 @raises site_utils.ParseBuildNameException: If the devserver returns
497 an invalid build name.
498 @raises autoupdater.ChromiumOSError: If something goes wrong in the
499 checking update engine client status or applying an update.
500 @raises AutoservRunError: If the update_engine_client isn't present on
501 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700502
beeps5e8c45a2013-12-17 22:05:11 -0800503 """
Dan Shib795b5a2015-09-24 13:26:35 -0700504 # servod could be running in a Ubuntu workstation.
beeps5e8c45a2013-12-17 22:05:11 -0800505 if not self._is_cros_host():
506 logging.info('Not attempting an update, either %s is not running '
507 'chromeos or we cannot find enough information about '
508 'the host.', self.hostname)
509 return
510
Dan Shib795b5a2015-09-24 13:26:35 -0700511 if lsbrelease_utils.is_moblab():
512 logging.info('Not attempting an update, %s is running moblab.',
513 self.hostname)
514 return
515
Richard Barnette3a7697f2016-04-20 11:33:27 -0700516 board = _CONFIG.get_config_value('CROS', 'servo_board')
J. Richard Barnette84895392015-04-30 12:31:01 -0700517 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
518 target_version = afe.run('get_stable_version', board=board)
Dan Shi3b2adf62015-09-02 17:46:54 -0700519 build_pattern = _CONFIG.get_config_value(
J. Richard Barnette84895392015-04-30 12:31:01 -0700520 'CROS', 'stable_build_pattern')
521 target_build = build_pattern % (board, target_version)
522 target_build_number = server_site_utils.ParseBuildName(
523 target_build)[3]
beeps5e8c45a2013-12-17 22:05:11 -0800524 ds = dev_server.ImageServer.resolve(self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700525 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800526
527 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700528 status, current_build_number = self._check_for_reboot(updater)
529 update_pending = True
beeps5e8c45a2013-12-17 22:05:11 -0800530 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
531 logging.info('servo host %s already processing an update, update '
532 'engine client status=%s', self.hostname, status)
J. Richard Barnette84895392015-04-30 12:31:01 -0700533 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800534 logging.info('Using devserver url: %s to trigger update on '
535 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700536 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800537 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700538 ds.stage_artifacts(target_build,
539 artifacts=['full_payload'])
540 except Exception as e:
541 logging.error('Staging artifacts failed: %s', str(e))
542 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800543 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700544 try:
Richard Barnette7e53aa02016-05-20 10:49:40 -0700545 # TODO(jrbarnette): This 'touch' is a gross hack
546 # to get us past crbug.com/613603. Once that
547 # bug is resolved, we should remove this code.
548 self.run('touch /home/chronos/.oobe_completed')
J. Richard Barnette84895392015-04-30 12:31:01 -0700549 updater.trigger_update()
550 except autoupdater.RootFSUpdateError as e:
551 trigger_download_status = 'failed with %s' % str(e)
552 autotest_stats.Counter(
553 'servo_host.RootFSUpdateError').increment()
554 else:
555 trigger_download_status = 'passed'
556 logging.info('Triggered download and update %s for %s, '
557 'update engine currently in status %s',
558 trigger_download_status, self.hostname,
559 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800560 else:
561 logging.info('servo host %s does not require an update.',
562 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700563 update_pending = False
564
565 if update_pending and wait_for_update:
566 logging.info('Waiting for servo update to complete.')
567 self.run('update_engine_client --follow', ignore_status=True)
568 status, current_build_number = self._check_for_reboot(updater)
569 if (status != autoupdater.UPDATER_IDLE or
570 current_build_number != target_build_number):
571 logging.error('Update failed; status: %s, '
572 'actual build: %s',
573 status, current_build_number)
574 message = ('Servo host failed to update from %s to %s' %
575 (current_build_number, target_build_number))
576 raise error.AutoservHostError(message)
beeps5e8c45a2013-12-17 22:05:11 -0800577
578
Fang Deng5d518f42013-08-02 14:04:32 -0700579 def verify_software(self):
beeps5e8c45a2013-12-17 22:05:11 -0800580 """Update the servo host and verify it's in a good state.
Fang Deng5d518f42013-08-02 14:04:32 -0700581
582 It overrides the base class function for verify_software.
beeps5e8c45a2013-12-17 22:05:11 -0800583 If an update is available, downloads and applies it. Then verifies:
Fang Deng5d518f42013-08-02 14:04:32 -0700584 1) Whether basic servo command can run successfully.
585 2) Whether USB is in a good state. crbug.com/225932
586
587 @raises ServoHostVerifyFailure if servo host does not pass the checks.
588
589 """
Richard Barnette79d78c42016-05-25 09:31:21 -0700590 # TODO(jrbarnette) Old versions of beaglebone_servo include
591 # the powerd package. In some (not yet understood)
592 # circumstances, powerd on beaglebone will shut down after
593 # attempting to suspend. Current versions of
594 # beaglebone_servo don't have powerd, but until we can purge
595 # the lab of the old images, we need to make sure powerd
596 # isn't running.
597 self.run('stop powerd', ignore_status=True)
598
beeps5e8c45a2013-12-17 22:05:11 -0800599 logging.info('Applying an update to the servo host, if necessary.')
Richard Barnette3a7697f2016-04-20 11:33:27 -0700600 self.update_image(wait_for_update=False)
Dan Shi33412a82014-06-10 15:12:27 -0700601 self._check_servo_config()
Dan Shie5b3c512014-08-21 12:12:09 -0700602 self._check_servod_status()
603
Dan Shi4d478522014-02-14 13:46:32 -0800604 # If servo is already initialized, we don't need to do it again, call
605 # _check_servod should be enough.
606 if self._servo:
607 self._check_servod()
608 else:
609 self._servo = servo.Servo(servo_host=self)
Dan Shie5b3c512014-08-21 12:12:09 -0700610 timeout, _ = retry.timeout(
611 self._servo.initialize_dut,
612 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
613 if timeout:
614 raise ServoHostVerifyFailure('Servo initialize timed out.')
Fang Deng5d518f42013-08-02 14:04:32 -0700615 logging.info('Sanity checks pass on servo host %s', self.hostname)
616
617
618 def _repair_with_sysrq_reboot(self):
619 """Reboot with magic SysRq key."""
620 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
621 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
Richard Barnetteab9769f2016-06-01 15:01:44 -0700622 reboot_cmd='echo "b" > /proc/sysrq-trigger',
Fang Deng5d518f42013-08-02 14:04:32 -0700623 fastsync=True)
624 time.sleep(self.REBOOT_DELAY_SECS)
625
626
Fang Dengd4fe7392013-09-20 12:18:21 -0700627 def has_power(self):
628 """Return whether or not the servo host is powered by PoE."""
629 # TODO(fdeng): See crbug.com/302791
630 # For now, assume all servo hosts in the lab have power.
631 return self.is_in_lab()
632
633
634 def power_cycle(self):
635 """Cycle power to this host via PoE if it is a lab device.
636
637 @raises ServoHostRepairFailure if it fails to power cycle the
638 servo host.
639
640 """
641 if self.has_power():
642 try:
643 rpm_client.set_power(self.hostname, 'CYCLE')
644 except (socket.error, xmlrpclib.Error,
645 httplib.BadStatusLine,
646 rpm_client.RemotePowerException) as e:
647 raise ServoHostRepairFailure(
648 'Power cycling %s failed: %s' % (self.hostname, e))
649 else:
650 logging.info('Skipping power cycling, not a lab device.')
651
652
Fang Deng5d518f42013-08-02 14:04:32 -0700653 def _powercycle_to_repair(self):
Fang Dengd4fe7392013-09-20 12:18:21 -0700654 """Power cycle the servo host using PoE.
655
656 @raises ServoHostRepairFailure if it fails to fix the servo host.
Fang Dengf0ea6142013-10-10 21:43:16 -0700657 @raises ServoHostRepairMethodNA if it does not support power.
Fang Dengd4fe7392013-09-20 12:18:21 -0700658
659 """
660 if not self.has_power():
Fang Dengf0ea6142013-10-10 21:43:16 -0700661 raise ServoHostRepairMethodNA('%s does not support power.' %
662 self.hostname)
Fang Dengd4fe7392013-09-20 12:18:21 -0700663 logging.info('Attempting repair via PoE powercycle.')
664 failed_cycles = 0
665 self.power_cycle()
666 while not self.wait_up(timeout=self.REBOOT_TIMEOUT_SECS):
667 failed_cycles += 1
668 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
669 raise ServoHostRepairFailure(
670 'Powercycled host %s %d times; device did not come back'
671 ' online.' % (self.hostname, failed_cycles))
672 self.power_cycle()
673 logging.info('Powercycling was successful after %d failures.',
674 failed_cycles)
675 # Allow some time for servod to get started.
676 time.sleep(self.REBOOT_DELAY_SECS)
Fang Deng5d518f42013-08-02 14:04:32 -0700677
678
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800679 def repair(self):
Fang Deng5d518f42013-08-02 14:04:32 -0700680 """Attempt to repair servo host.
681
682 This overrides the base class function for repair.
683 Note if the host is not in Cros Lab, the repair procedure
684 will be skipped.
685
686 @raises ServoHostRepairTotalFailure if all attempts fail.
687
688 """
689 if not self.is_in_lab():
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700690 logging.warning('Skip repairing servo host %s: Not a lab device.',
Fang Deng5d518f42013-08-02 14:04:32 -0700691 self.hostname)
692 return
693 logging.info('Attempting to repair servo host %s.', self.hostname)
Dan Shi4d478522014-02-14 13:46:32 -0800694 # Reset the cache to guarantee servo initialization being called later.
695 self._servo = None
Tom Wai-Hong Tam0635dce2016-06-02 02:17:50 +0800696 repair_funcs = [self._repair_with_sysrq_reboot,
697 self._powercycle_to_repair]
Fang Deng5d518f42013-08-02 14:04:32 -0700698 errors = []
699 for repair_func in repair_funcs:
Fang Dengf0ea6142013-10-10 21:43:16 -0700700 counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
Fang Deng5d518f42013-08-02 14:04:32 -0700701 try:
702 repair_func()
703 self.verify()
Gabe Black1e1c41b2015-02-04 23:55:15 -0800704 autotest_stats.Counter(counter_prefix + 'SUCCEEDED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700705 return
Fang Dengf0ea6142013-10-10 21:43:16 -0700706 except ServoHostRepairMethodNA as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700707 logging.warning('Repair method NA: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800708 autotest_stats.Counter(counter_prefix + 'RepairNA').increment()
Fang Dengf0ea6142013-10-10 21:43:16 -0700709 errors.append(str(e))
Fang Deng5d518f42013-08-02 14:04:32 -0700710 except Exception as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700711 logging.warning('Failed to repair servo: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800712 autotest_stats.Counter(counter_prefix + 'FAILED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700713 errors.append(str(e))
Gabe Black1e1c41b2015-02-04 23:55:15 -0800714 autotest_stats.Counter('servo_host_repair.Full_Repair_Failed'). \
715 increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700716 raise ServoHostRepairTotalFailure(
717 'All attempts at repairing the servo failed:\n%s' %
718 '\n'.join(errors))
719
720
Dan Shi4d478522014-02-14 13:46:32 -0800721 def get_servo(self):
722 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700723
Dan Shi4d478522014-02-14 13:46:32 -0800724 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700725 """
Dan Shi4d478522014-02-14 13:46:32 -0800726 return self._servo
727
728
Dan Shi023aae32016-05-25 11:13:01 -0700729def create_servo_host(dut, servo_args, try_lab_servo=False,
730 skip_host_up_check=False):
Dan Shi4d478522014-02-14 13:46:32 -0800731 """Create a ServoHost object.
732
Fang Denge545abb2014-12-30 18:43:47 -0800733 The `servo_args` parameter is a dictionary specifying optional
734 Servo client parameter overrides (i.e. a specific host or port).
735 When specified, the caller requires that an exception be raised
736 unless both the ServoHost and the Servo are successfully
737 created.
738
739 There are three possible cases:
740 1. If the DUT is in the Cros test lab then the ServoHost object
741 is only created for the host in the lab. Alternate host or
742 port settings in `servo_host` will be ignored.
743 2. When not case 1., but `servo_args` is not `None`, then create
744 a ServoHost object using `servo_args`.
745 3. Otherwise, return `None`.
746
747 When the `try_lab_servo` parameter is false, it indicates that a
748 ServoHost should not be created for a device in the Cros test
749 lab. The setting of `servo_args` takes precedence over the
750 setting of `try_lab_servo`.
Dan Shi4d478522014-02-14 13:46:32 -0800751
752 @param dut: host name of the host that servo connects. It can be used to
753 lookup the servo in test lab using naming convention.
754 @param servo_args: A dictionary that contains args for creating
755 a ServoHost object,
756 e.g. {'servo_host': '172.11.11.111',
757 'servo_port': 9999}.
758 See comments above.
Fang Denge545abb2014-12-30 18:43:47 -0800759 @param try_lab_servo: Boolean. Whether to create ServoHost for a device
760 in test lab. See above.
Dan Shi023aae32016-05-25 11:13:01 -0700761 @param skip_host_up_check: True to skip the check of if servo host is
762 pingable when creating the ServoHost object. This can be used when
763 creating a servo host object to be repaired by PoE. Default is False
Dan Shi4d478522014-02-14 13:46:32 -0800764
765 @returns: A ServoHost object or None. See comments above.
766
767 """
Dan Shi5401d2e2015-09-10 15:42:06 -0700768 required_by_test = servo_args is not None
Dan Shi3b2adf62015-09-02 17:46:54 -0700769 if not utils.is_in_container():
770 is_moblab = utils.is_moblab()
771 else:
772 is_moblab = _CONFIG.get_config_value(
773 'SSP', 'is_moblab', type=bool, default=False)
774 if not is_moblab:
Cheng-Yi Chiang22612862015-08-20 20:39:57 +0800775 dut_is_hostname = not dnsname_mangler.is_ip_address(dut)
776 if dut_is_hostname:
777 lab_servo_hostname = make_servo_hostname(dut)
778 is_in_lab = utils.host_is_in_lab_zone(lab_servo_hostname)
779 else:
780 is_in_lab = False
Simran Basi0739d682015-02-25 16:22:56 -0800781 else:
782 # Servos on Moblab are not in the actual lab.
783 is_in_lab = False
784 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
785 hosts = afe.get_hosts(hostname=dut)
786 if hosts and SERVO_HOST_ATTR in hosts[0].attributes:
787 servo_args = {}
788 servo_args[SERVO_HOST_ATTR] = hosts[0].attributes[SERVO_HOST_ATTR]
789 servo_args[SERVO_PORT_ATTR] = hosts[0].attributes.get(
790 SERVO_PORT_ATTR, 9999)
Dan Shi3b2adf62015-09-02 17:46:54 -0700791 if (utils.is_in_container() and
792 servo_args[SERVO_HOST_ATTR] in ['localhost', '127.0.0.1']):
793 servo_args[SERVO_HOST_ATTR] = _CONFIG.get_config_value(
794 'SSP', 'host_container_ip', type=str, default=None)
Dan Shi4d478522014-02-14 13:46:32 -0800795
Fang Denge545abb2014-12-30 18:43:47 -0800796 if not is_in_lab:
Dan Shi5401d2e2015-09-10 15:42:06 -0700797 if not required_by_test:
Fang Denge545abb2014-12-30 18:43:47 -0800798 return None
799 return ServoHost(required_by_test=True, is_in_lab=False, **servo_args)
Kevin Chengd9dfa582016-05-04 09:37:34 -0700800 elif ((servo_args is not None or try_lab_servo)
Dan Shi023aae32016-05-25 11:13:01 -0700801 and (skip_host_up_check or servo_host_is_up(lab_servo_hostname))):
Kevin Chengd9dfa582016-05-04 09:37:34 -0700802 return ServoHost(servo_host=lab_servo_hostname, is_in_lab=is_in_lab,
803 required_by_test=required_by_test)
Dan Shi4d478522014-02-14 13:46:32 -0800804 else:
Dan Shibbb0cb62014-03-24 17:50:57 -0700805 return None