blob: 8bfbc751a7fa382c7a1972897c2a852e166335c8 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
Dan Shi0942b1d2015-03-31 11:07:00 -070021from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080022from autotest_lib.client.common_lib.cros import autoupdater
23from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000027from autotest_lib.client.cros import constants as client_constants
beeps5e8c45a2013-12-17 22:05:11 -080028from autotest_lib.server import site_utils as server_site_utils
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080029from autotest_lib.server.cros import dnsname_mangler
Fang Deng5d518f42013-08-02 14:04:32 -070030from autotest_lib.server.cros.servo import servo
Simran Basi0739d682015-02-25 16:22:56 -080031from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Fang Deng5d518f42013-08-02 14:04:32 -070032from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070033from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070034
35
Simran Basi0739d682015-02-25 16:22:56 -080036# Names of the host attributes in the database that represent the values for
37# the servo_host and servo_port for a servo connected to the DUT.
38SERVO_HOST_ATTR = 'servo_host'
39SERVO_PORT_ATTR = 'servo_port'
40
Dan Shi3b2adf62015-09-02 17:46:54 -070041_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080042ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
43 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080044
Fang Deng5d518f42013-08-02 14:04:32 -070045class ServoHostException(error.AutoservError):
46 """This is the base class for exceptions raised by ServoHost."""
47 pass
48
49
50class ServoHostVerifyFailure(ServoHostException):
51 """Raised when servo verification fails."""
52 pass
53
54
Fang Dengd4fe7392013-09-20 12:18:21 -070055class ServoHostRepairFailure(ServoHostException):
56 """Raised when a repair method fails to repair a servo host."""
57 pass
58
59
Fang Dengf0ea6142013-10-10 21:43:16 -070060class ServoHostRepairMethodNA(ServoHostException):
61 """Raised when a repair method is not applicable."""
62 pass
63
64
Fang Deng5d518f42013-08-02 14:04:32 -070065class ServoHostRepairTotalFailure(ServoHostException):
66 """Raised if all attempts to repair a servo host fail."""
67 pass
68
69
Fang Deng5d518f42013-08-02 14:04:32 -070070class ServoHost(ssh_host.SSHHost):
71 """Host class for a host that controls a servo, e.g. beaglebone."""
72
73 # Timeout for getting the value of 'pwr_button'.
74 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
75 # Timeout for rebooting servo host.
76 REBOOT_TIMEOUT_SECS = 90
77 HOST_DOWN_TIMEOUT_SECS = 60
78 # Delay after rebooting for servod to become fully functional.
79 REBOOT_DELAY_SECS = 20
80 # Servod process name.
81 SERVOD_PROCESS = 'servod'
Dan Shie5b3c512014-08-21 12:12:09 -070082 # Timeout for initializing servo signals.
83 INITIALIZE_SERVO_TIMEOUT_SECS = 30
xixuan6cf6d2f2016-01-29 15:29:00 -080084 # Ready test function
85 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070086
Fang Dengd4fe7392013-09-20 12:18:21 -070087 _MAX_POWER_CYCLE_ATTEMPTS = 3
Gabe Black1e1c41b2015-02-04 23:55:15 -080088 _timer = autotest_stats.Timer('servo_host')
Fang Dengd4fe7392013-09-20 12:18:21 -070089
Fang Deng5d518f42013-08-02 14:04:32 -070090
91 def _initialize(self, servo_host='localhost', servo_port=9999,
Dan Shi4d478522014-02-14 13:46:32 -080092 required_by_test=True, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -070093 """Initialize a ServoHost instance.
94
95 A ServoHost instance represents a host that controls a servo.
96
97 @param servo_host: Name of the host where the servod process
98 is running.
99 @param servo_port: Port the servod process is listening on.
Dan Shi4d478522014-02-14 13:46:32 -0800100 @param required_by_test: True if servo is required by test.
101 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
102 to None, for which utils.host_is_in_lab_zone will be
103 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700104
105 """
106 super(ServoHost, self)._initialize(hostname=servo_host,
107 *args, **dargs)
Dan Shi4d478522014-02-14 13:46:32 -0800108 if is_in_lab is None:
109 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
110 else:
111 self._is_in_lab = is_in_lab
Fang Deng5d518f42013-08-02 14:04:32 -0700112 self._is_localhost = (self.hostname == 'localhost')
Kevin Chengcdece6b2016-07-27 12:55:01 -0700113 self._servo_port = servo_port
xixuan6cf6d2f2016-01-29 15:29:00 -0800114
Fang Deng5d518f42013-08-02 14:04:32 -0700115 # Commands on the servo host must be run by the superuser. Our account
116 # on Beaglebone is root, but locally we might be running as a
117 # different user. If so - `sudo ' will have to be added to the
118 # commands.
119 if self._is_localhost:
120 self._sudo_required = utils.system_output('id -u') != '0'
121 else:
122 self._sudo_required = False
Dan Shi4d478522014-02-14 13:46:32 -0800123 # Create a cache of Servo object. This must be called at the end of
124 # _initialize to make sure all attributes are set.
125 self._servo = None
Dan Shi90466352015-09-22 15:01:05 -0700126 self.required_by_test = required_by_test
Dan Shi4d478522014-02-14 13:46:32 -0800127 try:
xixuan2b80c182016-03-28 11:59:30 -0700128 if ENABLE_SSH_TUNNEL_FOR_SERVO:
129 self._servod_server = self.rpc_server_tracker.xmlrpc_connect(
130 None, servo_port, ready_test_name=self.SERVO_READY_METHOD,
131 timeout_seconds=60)
132 else:
133 remote = 'http://%s:%s' % (self.hostname, servo_port)
134 self._servod_server = xmlrpclib.ServerProxy(remote)
Dan Shi4d478522014-02-14 13:46:32 -0800135 self.verify()
Alex Millercc589692014-04-21 18:00:22 -0700136 except Exception:
Dan Shibbb0cb62014-03-24 17:50:57 -0700137 if required_by_test:
138 if not self.is_in_lab():
139 raise
140 else:
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800141 self.repair()
Fang Deng5d518f42013-08-02 14:04:32 -0700142
143
144 def is_in_lab(self):
145 """Check whether the servo host is a lab device.
146
147 @returns: True if the servo host is in Cros Lab, otherwise False.
148
149 """
150 return self._is_in_lab
151
152
153 def is_localhost(self):
154 """Checks whether the servo host points to localhost.
155
156 @returns: True if it points to localhost, otherwise False.
157
158 """
159 return self._is_localhost
160
161
162 def get_servod_server_proxy(self):
163 """Return a proxy that can be used to communicate with servod server.
164
165 @returns: An xmlrpclib.ServerProxy that is connected to the servod
166 server on the host.
167
168 """
169 return self._servod_server
170
171
172 def get_wait_up_processes(self):
173 """Get the list of local processes to wait for in wait_up.
174
175 Override get_wait_up_processes in
176 autotest_lib.client.common_lib.hosts.base_classes.Host.
177 Wait for servod process to go up. Called by base class when
178 rebooting the device.
179
180 """
181 processes = [self.SERVOD_PROCESS]
182 return processes
183
184
beeps5e8c45a2013-12-17 22:05:11 -0800185 def _is_cros_host(self):
186 """Check if a servo host is running chromeos.
187
188 @return: True if the servo host is running chromeos.
189 False if it isn't, or we don't have enough information.
190 """
191 try:
192 result = self.run('grep -q CHROMEOS /etc/lsb-release',
193 ignore_status=True, timeout=10)
194 except (error.AutoservRunError, error.AutoservSSHTimeout):
195 return False
196 return result.exit_status == 0
197
198
Fang Deng5d518f42013-08-02 14:04:32 -0700199 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
200 connect_timeout=None, alive_interval=None):
201 """Override default make_ssh_command to use tuned options.
202
203 Tuning changes:
204 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
205 connection failure. Consistency with remote_access.py.
206
207 - ServerAliveInterval=180; which causes SSH to ping connection every
208 180 seconds. In conjunction with ServerAliveCountMax ensures
209 that if the connection dies, Autotest will bail out quickly.
210
211 - ServerAliveCountMax=3; consistency with remote_access.py.
212
213 - ConnectAttempts=4; reduce flakiness in connection errors;
214 consistency with remote_access.py.
215
216 - UserKnownHostsFile=/dev/null; we don't care about the keys.
217
218 - SSH protocol forced to 2; needed for ServerAliveInterval.
219
220 @param user User name to use for the ssh connection.
221 @param port Port on the target host to use for ssh connection.
222 @param opts Additional options to the ssh command.
223 @param hosts_file Ignored.
224 @param connect_timeout Ignored.
225 @param alive_interval Ignored.
226
227 @returns: An ssh command with the requested settings.
228
229 """
230 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
231 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
232 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
233 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
234 ' -o Protocol=2 -l %s -p %d')
235 return base_command % (opts, user, port)
236
237
238 def _make_scp_cmd(self, sources, dest):
239 """Format scp command.
240
241 Given a list of source paths and a destination path, produces the
242 appropriate scp command for encoding it. Remote paths must be
243 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
244 to allow additional ssh options.
245
246 @param sources: A list of source paths to copy from.
247 @param dest: Destination path to copy to.
248
249 @returns: An scp command that copies |sources| on local machine to
250 |dest| on the remote servo host.
251
252 """
253 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
254 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
255 return command % (self.master_ssh_option,
256 self.port, ' '.join(sources), dest)
257
258
259 def run(self, command, timeout=3600, ignore_status=False,
260 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
261 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
262 """Run a command on the servo host.
263
264 Extends method `run` in SSHHost. If the servo host is a remote device,
265 it will call `run` in SSHost without changing anything.
266 If the servo host is 'localhost', it will call utils.system_output.
267
268 @param command: The command line string.
269 @param timeout: Time limit in seconds before attempting to
270 kill the running process. The run() function
271 will take a few seconds longer than 'timeout'
272 to complete if it has to kill the process.
273 @param ignore_status: Do not raise an exception, no matter
274 what the exit code of the command is.
275 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
276 @param connect_timeout: SSH connection timeout (in seconds)
277 Ignored if host is 'localhost'.
278 @param options: String with additional ssh command options
279 Ignored if host is 'localhost'.
280 @param stdin: Stdin to pass (a string) to the executed command.
281 @param verbose: Log the commands.
282 @param args: Sequence of strings to pass as arguments to command by
283 quoting them in " and escaping their contents if necessary.
284
285 @returns: A utils.CmdResult object.
286
287 @raises AutoservRunError if the command failed.
288 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
289 when servo host is not 'localhost'.
290
291 """
292 run_args = {'command': command, 'timeout': timeout,
293 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
294 'stderr_tee': stderr_tee, 'stdin': stdin,
295 'verbose': verbose, 'args': args}
296 if self.is_localhost():
297 if self._sudo_required:
298 run_args['command'] = 'sudo -n %s' % command
299 try:
300 return utils.run(**run_args)
301 except error.CmdError as e:
302 logging.error(e)
303 raise error.AutoservRunError('command execution error',
304 e.result_obj)
305 else:
306 run_args['connect_timeout'] = connect_timeout
307 run_args['options'] = options
308 return super(ServoHost, self).run(**run_args)
309
310
Dan Shi33412a82014-06-10 15:12:27 -0700311 @_timer.decorate
Fang Deng5d518f42013-08-02 14:04:32 -0700312 def _check_servod(self):
313 """A sanity check of the servod state."""
314 msg_prefix = 'Servod error: %s'
315 error_msg = None
316 try:
317 timeout, _ = retry.timeout(
318 self._servod_server.get, args=('pwr_button', ),
319 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
320 if timeout:
321 error_msg = msg_prefix % 'Request timed out.'
322 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
323 error_msg = msg_prefix % e
324 if error_msg:
325 raise ServoHostVerifyFailure(error_msg)
326
327
Dan Shi33412a82014-06-10 15:12:27 -0700328 def _check_servo_config(self):
329 """Check if config file exists for servod.
330
331 If servod config file does not exist, there is no need to verify if
332 servo is working. The servo could be attached to a board not supported
333 yet.
334
335 @raises ServoHostVerifyFailure if /var/lib/servod/config does not exist.
336
337 """
Kevin Chengcdece6b2016-07-27 12:55:01 -0700338 if self._is_localhost or not self._is_cros_host():
339 logging.info('We will skip servo config check, either %s '
340 'is not running chromeos or we cannot find enough '
341 'information about the host.', self.hostname)
Simran Basi0739d682015-02-25 16:22:56 -0800342 return
Kevin Chengcdece6b2016-07-27 12:55:01 -0700343
344 failure_data = []
345 servod_config_file = '/var/lib/servod/config'
346 config_files = ['%s_%s' % (servod_config_file, self._servo_port),
347 servod_config_file]
348
349 # We'll need to check for two types of config files since we're
350 # transistioning to support a new servo setup and we need to keep both
351 # to enable successful reverts.
352 # TODO(kevcheng): We can get rid of checking for servod_config_file once
353 # the fleet of beaglebones all have new style config file.
354 for config_file in config_files:
355 try:
356 self.run('test -f %s' % config_file)
Ricky Liang86b80182014-06-13 14:39:42 +0800357 return
Kevin Chengcdece6b2016-07-27 12:55:01 -0700358 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
359 failure_data.append((config_file, e))
360
361 failure_message = ('Servo config file check failed for %s: ' %
362 self.hostname)
363 for data in failure_data:
364 failure_message += '%s (%s) ' % (data[0], data[1])
365 raise ServoHostVerifyFailure(failure_message)
Dan Shi33412a82014-06-10 15:12:27 -0700366
367
Dan Shie5b3c512014-08-21 12:12:09 -0700368 def _check_servod_status(self):
369 """Check if servod process is running.
370
371 If servod is not running, there is no need to verify if servo is
372 working. Check the process before making any servod call can avoid
373 long timeout that eventually fail any servod call.
374 If the servo host is set to localhost, failure of servod status check
375 will be ignored, as servo call may use ssh tunnel.
376
377 @raises ServoHostVerifyFailure if servod process does not exist.
378
379 """
380 try:
Dan Shi18040e42014-09-03 11:14:00 -0700381 pids = [str(int(s)) for s in
382 self.run('pgrep servod').stdout.strip().split('\n')]
383 logging.info('servod is running, PID=%s', ','.join(pids))
Dan Shie5b3c512014-08-21 12:12:09 -0700384 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
385 if self._is_localhost:
386 logging.info('Ignoring servod status check failure. servo host '
387 'is set to localhost, servo call may use ssh '
388 'tunnel to go through.')
389 else:
390 raise ServoHostVerifyFailure(
391 'Servod status check failed for %s: %s' %
392 (self.hostname, e))
393
394
Dan Shi0942b1d2015-03-31 11:07:00 -0700395 def get_release_version(self):
396 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
397
398 @returns The version string in lsb-release, under attribute
399 CHROMEOS_RELEASE_VERSION.
400 """
401 lsb_release_content = self.run(
402 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
403 return lsbrelease_utils.get_chromeos_release_version(
404 lsb_release_content=lsb_release_content)
405
406
Richard Barnette3a7697f2016-04-20 11:33:27 -0700407 def _check_for_reboot(self, updater):
408 """
409 Reboot this servo host if an upgrade is waiting.
410
411 If the host has successfully downloaded and finalized a new
412 build, reboot.
413
414 @param updater: a ChromiumOSUpdater instance for checking
415 whether reboot is needed.
416 @return Return a (status, build) tuple reflecting the
417 update_engine status and current build of the host
418 at the end of the call.
419 """
420 current_build_number = self.get_release_version()
421 status = updater.check_update_status()
422 if status == autoupdater.UPDATER_NEED_REBOOT:
423 logging.info('Rebooting beaglebone host %s from build %s',
424 self.hostname, current_build_number)
425 # Tell the reboot() call not to wait for completion.
426 # Otherwise, the call will log reboot failure if servo does
427 # not come back. The logged reboot failure will lead to
428 # test job failure. If the test does not require servo, we
429 # don't want servo failure to fail the test with error:
430 # `Host did not return from reboot` in status.log.
431 reboot_cmd = 'sleep 1 ; reboot & sleep 10; reboot -f',
Richard Barnetteab9769f2016-06-01 15:01:44 -0700432 self.reboot(reboot_cmd=reboot_cmd, fastsync=True, wait=False)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700433
434 # We told the reboot() call not to wait, but we need to wait
435 # for the reboot before we continue. Alas. The code from
436 # here below is basically a copy of Host.wait_for_restart(),
437 # with the logging bits ripped out, so that they can't cause
438 # the failure logging problem described above.
439 #
440 # The black stain that this has left on my soul can never be
441 # erased.
442 old_boot_id = self.get_boot_id()
443 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
444 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
445 old_boot_id=old_boot_id):
446 raise error.AutoservHostError(
447 'servo host %s failed to shut down.' %
448 self.hostname)
449 if self.wait_up(timeout=120):
450 current_build_number = self.get_release_version()
451 status = updater.check_update_status()
452 logging.info('servo host %s back from reboot, with build %s',
453 self.hostname, current_build_number)
454 else:
455 raise error.AutoservHostError(
456 'servo host %s failed to come back from reboot.' %
457 self.hostname)
458 return status, current_build_number
459
460
beeps5e8c45a2013-12-17 22:05:11 -0800461 @_timer.decorate
Richard Barnette3a7697f2016-04-20 11:33:27 -0700462 def update_image(self, wait_for_update=False):
beeps5e8c45a2013-12-17 22:05:11 -0800463 """Update the image on the servo host, if needed.
464
J. Richard Barnette84895392015-04-30 12:31:01 -0700465 This method recognizes the following cases:
466 * If the Host is not running Chrome OS, do nothing.
467 * If a previously triggered update is now complete, reboot
468 to the new version.
469 * If the host is processing a previously triggered update,
470 do nothing.
471 * If the host is running a version of Chrome OS different
472 from the default for servo Hosts, trigger an update, but
473 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800474
Richard Barnette3a7697f2016-04-20 11:33:27 -0700475 @param wait_for_update If an update needs to be applied and
476 this is true, then don't return until the update is
477 downloaded and finalized, and the host rebooted.
beeps5e8c45a2013-12-17 22:05:11 -0800478 @raises dev_server.DevServerException: If all the devservers are down.
479 @raises site_utils.ParseBuildNameException: If the devserver returns
480 an invalid build name.
481 @raises autoupdater.ChromiumOSError: If something goes wrong in the
482 checking update engine client status or applying an update.
483 @raises AutoservRunError: If the update_engine_client isn't present on
484 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700485
beeps5e8c45a2013-12-17 22:05:11 -0800486 """
Dan Shib795b5a2015-09-24 13:26:35 -0700487 # servod could be running in a Ubuntu workstation.
beeps5e8c45a2013-12-17 22:05:11 -0800488 if not self._is_cros_host():
489 logging.info('Not attempting an update, either %s is not running '
490 'chromeos or we cannot find enough information about '
491 'the host.', self.hostname)
492 return
493
Dan Shib795b5a2015-09-24 13:26:35 -0700494 if lsbrelease_utils.is_moblab():
495 logging.info('Not attempting an update, %s is running moblab.',
496 self.hostname)
497 return
498
Richard Barnette3a7697f2016-04-20 11:33:27 -0700499 board = _CONFIG.get_config_value('CROS', 'servo_board')
J. Richard Barnette84895392015-04-30 12:31:01 -0700500 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
501 target_version = afe.run('get_stable_version', board=board)
Dan Shi3b2adf62015-09-02 17:46:54 -0700502 build_pattern = _CONFIG.get_config_value(
J. Richard Barnette84895392015-04-30 12:31:01 -0700503 'CROS', 'stable_build_pattern')
504 target_build = build_pattern % (board, target_version)
505 target_build_number = server_site_utils.ParseBuildName(
506 target_build)[3]
beeps5e8c45a2013-12-17 22:05:11 -0800507 ds = dev_server.ImageServer.resolve(self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700508 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800509
510 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700511 status, current_build_number = self._check_for_reboot(updater)
512 update_pending = True
beeps5e8c45a2013-12-17 22:05:11 -0800513 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
514 logging.info('servo host %s already processing an update, update '
515 'engine client status=%s', self.hostname, status)
J. Richard Barnette84895392015-04-30 12:31:01 -0700516 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800517 logging.info('Using devserver url: %s to trigger update on '
518 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700519 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800520 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700521 ds.stage_artifacts(target_build,
522 artifacts=['full_payload'])
523 except Exception as e:
524 logging.error('Staging artifacts failed: %s', str(e))
525 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800526 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700527 try:
Richard Barnette7e53aa02016-05-20 10:49:40 -0700528 # TODO(jrbarnette): This 'touch' is a gross hack
529 # to get us past crbug.com/613603. Once that
530 # bug is resolved, we should remove this code.
531 self.run('touch /home/chronos/.oobe_completed')
J. Richard Barnette84895392015-04-30 12:31:01 -0700532 updater.trigger_update()
533 except autoupdater.RootFSUpdateError as e:
534 trigger_download_status = 'failed with %s' % str(e)
535 autotest_stats.Counter(
536 'servo_host.RootFSUpdateError').increment()
537 else:
538 trigger_download_status = 'passed'
539 logging.info('Triggered download and update %s for %s, '
540 'update engine currently in status %s',
541 trigger_download_status, self.hostname,
542 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800543 else:
544 logging.info('servo host %s does not require an update.',
545 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700546 update_pending = False
547
548 if update_pending and wait_for_update:
549 logging.info('Waiting for servo update to complete.')
550 self.run('update_engine_client --follow', ignore_status=True)
551 status, current_build_number = self._check_for_reboot(updater)
552 if (status != autoupdater.UPDATER_IDLE or
553 current_build_number != target_build_number):
554 logging.error('Update failed; status: %s, '
555 'actual build: %s',
556 status, current_build_number)
557 message = ('Servo host failed to update from %s to %s' %
558 (current_build_number, target_build_number))
559 raise error.AutoservHostError(message)
beeps5e8c45a2013-12-17 22:05:11 -0800560
561
Fang Deng5d518f42013-08-02 14:04:32 -0700562 def verify_software(self):
beeps5e8c45a2013-12-17 22:05:11 -0800563 """Update the servo host and verify it's in a good state.
Fang Deng5d518f42013-08-02 14:04:32 -0700564
565 It overrides the base class function for verify_software.
beeps5e8c45a2013-12-17 22:05:11 -0800566 If an update is available, downloads and applies it. Then verifies:
Fang Deng5d518f42013-08-02 14:04:32 -0700567 1) Whether basic servo command can run successfully.
568 2) Whether USB is in a good state. crbug.com/225932
569
570 @raises ServoHostVerifyFailure if servo host does not pass the checks.
571
572 """
Richard Barnette79d78c42016-05-25 09:31:21 -0700573 # TODO(jrbarnette) Old versions of beaglebone_servo include
574 # the powerd package. In some (not yet understood)
575 # circumstances, powerd on beaglebone will shut down after
576 # attempting to suspend. Current versions of
577 # beaglebone_servo don't have powerd, but until we can purge
578 # the lab of the old images, we need to make sure powerd
579 # isn't running.
580 self.run('stop powerd', ignore_status=True)
581
beeps5e8c45a2013-12-17 22:05:11 -0800582 logging.info('Applying an update to the servo host, if necessary.')
Richard Barnette3a7697f2016-04-20 11:33:27 -0700583 self.update_image(wait_for_update=False)
Dan Shi33412a82014-06-10 15:12:27 -0700584 self._check_servo_config()
Dan Shie5b3c512014-08-21 12:12:09 -0700585 self._check_servod_status()
586
Dan Shi4d478522014-02-14 13:46:32 -0800587 # If servo is already initialized, we don't need to do it again, call
588 # _check_servod should be enough.
589 if self._servo:
590 self._check_servod()
591 else:
592 self._servo = servo.Servo(servo_host=self)
Dan Shie5b3c512014-08-21 12:12:09 -0700593 timeout, _ = retry.timeout(
594 self._servo.initialize_dut,
595 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
596 if timeout:
597 raise ServoHostVerifyFailure('Servo initialize timed out.')
Fang Deng5d518f42013-08-02 14:04:32 -0700598 logging.info('Sanity checks pass on servo host %s', self.hostname)
599
600
601 def _repair_with_sysrq_reboot(self):
602 """Reboot with magic SysRq key."""
603 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
604 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
Richard Barnetteab9769f2016-06-01 15:01:44 -0700605 reboot_cmd='echo "b" > /proc/sysrq-trigger',
Fang Deng5d518f42013-08-02 14:04:32 -0700606 fastsync=True)
607 time.sleep(self.REBOOT_DELAY_SECS)
608
609
Fang Dengd4fe7392013-09-20 12:18:21 -0700610 def has_power(self):
611 """Return whether or not the servo host is powered by PoE."""
612 # TODO(fdeng): See crbug.com/302791
613 # For now, assume all servo hosts in the lab have power.
614 return self.is_in_lab()
615
616
617 def power_cycle(self):
618 """Cycle power to this host via PoE if it is a lab device.
619
620 @raises ServoHostRepairFailure if it fails to power cycle the
621 servo host.
622
623 """
624 if self.has_power():
625 try:
626 rpm_client.set_power(self.hostname, 'CYCLE')
627 except (socket.error, xmlrpclib.Error,
628 httplib.BadStatusLine,
629 rpm_client.RemotePowerException) as e:
630 raise ServoHostRepairFailure(
631 'Power cycling %s failed: %s' % (self.hostname, e))
632 else:
633 logging.info('Skipping power cycling, not a lab device.')
634
635
Fang Deng5d518f42013-08-02 14:04:32 -0700636 def _powercycle_to_repair(self):
Fang Dengd4fe7392013-09-20 12:18:21 -0700637 """Power cycle the servo host using PoE.
638
639 @raises ServoHostRepairFailure if it fails to fix the servo host.
Fang Dengf0ea6142013-10-10 21:43:16 -0700640 @raises ServoHostRepairMethodNA if it does not support power.
Fang Dengd4fe7392013-09-20 12:18:21 -0700641
642 """
643 if not self.has_power():
Fang Dengf0ea6142013-10-10 21:43:16 -0700644 raise ServoHostRepairMethodNA('%s does not support power.' %
645 self.hostname)
Fang Dengd4fe7392013-09-20 12:18:21 -0700646 logging.info('Attempting repair via PoE powercycle.')
647 failed_cycles = 0
648 self.power_cycle()
649 while not self.wait_up(timeout=self.REBOOT_TIMEOUT_SECS):
650 failed_cycles += 1
651 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
652 raise ServoHostRepairFailure(
653 'Powercycled host %s %d times; device did not come back'
654 ' online.' % (self.hostname, failed_cycles))
655 self.power_cycle()
656 logging.info('Powercycling was successful after %d failures.',
657 failed_cycles)
658 # Allow some time for servod to get started.
659 time.sleep(self.REBOOT_DELAY_SECS)
Fang Deng5d518f42013-08-02 14:04:32 -0700660
661
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800662 def repair(self):
Fang Deng5d518f42013-08-02 14:04:32 -0700663 """Attempt to repair servo host.
664
665 This overrides the base class function for repair.
666 Note if the host is not in Cros Lab, the repair procedure
667 will be skipped.
668
669 @raises ServoHostRepairTotalFailure if all attempts fail.
670
671 """
672 if not self.is_in_lab():
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700673 logging.warning('Skip repairing servo host %s: Not a lab device.',
Fang Deng5d518f42013-08-02 14:04:32 -0700674 self.hostname)
675 return
676 logging.info('Attempting to repair servo host %s.', self.hostname)
Dan Shi4d478522014-02-14 13:46:32 -0800677 # Reset the cache to guarantee servo initialization being called later.
678 self._servo = None
Tom Wai-Hong Tam0635dce2016-06-02 02:17:50 +0800679 repair_funcs = [self._repair_with_sysrq_reboot,
680 self._powercycle_to_repair]
Fang Deng5d518f42013-08-02 14:04:32 -0700681 errors = []
682 for repair_func in repair_funcs:
Fang Dengf0ea6142013-10-10 21:43:16 -0700683 counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
Fang Deng5d518f42013-08-02 14:04:32 -0700684 try:
685 repair_func()
686 self.verify()
Gabe Black1e1c41b2015-02-04 23:55:15 -0800687 autotest_stats.Counter(counter_prefix + 'SUCCEEDED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700688 return
Fang Dengf0ea6142013-10-10 21:43:16 -0700689 except ServoHostRepairMethodNA as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700690 logging.warning('Repair method NA: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800691 autotest_stats.Counter(counter_prefix + 'RepairNA').increment()
Fang Dengf0ea6142013-10-10 21:43:16 -0700692 errors.append(str(e))
Fang Deng5d518f42013-08-02 14:04:32 -0700693 except Exception as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700694 logging.warning('Failed to repair servo: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800695 autotest_stats.Counter(counter_prefix + 'FAILED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700696 errors.append(str(e))
Gabe Black1e1c41b2015-02-04 23:55:15 -0800697 autotest_stats.Counter('servo_host_repair.Full_Repair_Failed'). \
698 increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700699 raise ServoHostRepairTotalFailure(
700 'All attempts at repairing the servo failed:\n%s' %
701 '\n'.join(errors))
702
703
Dan Shi4d478522014-02-14 13:46:32 -0800704 def get_servo(self):
705 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700706
Dan Shi4d478522014-02-14 13:46:32 -0800707 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700708 """
Dan Shi4d478522014-02-14 13:46:32 -0800709 return self._servo
710
711
Richard Barnetteea3e4602016-06-10 12:36:41 -0700712def make_servo_hostname(dut_hostname):
713 """Given a DUT's hostname, return the hostname of its servo.
714
715 @param dut_hostname: hostname of a DUT.
716
717 @return hostname of the DUT's servo.
718
719 """
720 host_parts = dut_hostname.split('.')
721 host_parts[0] = host_parts[0] + '-servo'
722 return '.'.join(host_parts)
723
724
725def servo_host_is_up(servo_hostname):
726 """
727 Given a servo host name, return if it's up or not.
728
729 @param servo_hostname: hostname of the servo host.
730
731 @return True if it's up, False otherwise
732 """
733 # Technically, this duplicates the SSH ping done early in the servo
734 # proxy initialization code. However, this ping ends in a couple
735 # seconds when if fails, rather than the 60 seconds it takes to decide
736 # that an SSH ping has timed out. Specifically, that timeout happens
737 # when our servo DNS name resolves, but there is no host at that IP.
738 logging.info('Pinging servo host at %s', servo_hostname)
739 ping_config = ping_runner.PingConfig(
740 servo_hostname, count=3,
741 ignore_result=True, ignore_status=True)
742 return ping_runner.PingRunner().ping(ping_config).received > 0
743
744
745def _get_standard_servo_args(dut_host):
746 """
747 Return servo data associated with a given DUT.
748
749 This checks for the presence of servo host and port attached to the
750 given `dut_host`. This data should be stored in the
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700751 `_afe_host.attributes` field in the provided `dut_host` parameter.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700752
753 @param dut_host Instance of `Host` on which to find the servo
754 attributes.
755 @return A tuple of `servo_args` dict with host and an option port,
756 plus an `is_in_lab` flag indicating whether this in the CrOS
757 test lab, or some different environment.
758 """
759 servo_args = None
760 is_in_lab = False
761 is_ssp_moblab = False
762 if utils.is_in_container():
763 is_moblab = _CONFIG.get_config_value(
764 'SSP', 'is_moblab', type=bool, default=False)
765 is_ssp_moblab = is_moblab
766 else:
767 is_moblab = utils.is_moblab()
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700768 attrs = dut_host._afe_host.attributes
Richard Barnetteea3e4602016-06-10 12:36:41 -0700769 if attrs and SERVO_HOST_ATTR in attrs:
770 servo_host = attrs[SERVO_HOST_ATTR]
771 if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']):
772 servo_host = _CONFIG.get_config_value(
773 'SSP', 'host_container_ip', type=str, default=None)
774 servo_args = {SERVO_HOST_ATTR: servo_host}
775 if SERVO_PORT_ATTR in attrs:
776 servo_args[SERVO_PORT_ATTR] = attrs[SERVO_PORT_ATTR]
777 is_in_lab = (not is_moblab
778 and utils.host_is_in_lab_zone(servo_host))
779
780 # TODO(jrbarnette): This test to use the default lab servo hostname
781 # is a legacy that we need only until every host in the DB has
782 # proper attributes.
783 elif (not is_moblab and
784 not dnsname_mangler.is_ip_address(dut_host.hostname)):
785 servo_host = make_servo_hostname(dut_host.hostname)
786 is_in_lab = utils.host_is_in_lab_zone(servo_host)
787 if is_in_lab:
788 servo_args = {SERVO_HOST_ATTR: servo_host}
789 return servo_args, is_in_lab
790
791
Dan Shi023aae32016-05-25 11:13:01 -0700792def create_servo_host(dut, servo_args, try_lab_servo=False,
793 skip_host_up_check=False):
Richard Barnetteea3e4602016-06-10 12:36:41 -0700794 """
795 Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -0800796
Richard Barnetteea3e4602016-06-10 12:36:41 -0700797 This function attempts to create a `ServoHost` object for a servo
798 connected to the given `dut`. The function distinguishes these
799 cases:
800 * No servo parameters for the DUT can be determined. No servo
801 host is created.
802 * The servo host should be created if parameters can be
803 determined.
804 * The servo host should not be created even if parameters are
805 known.
Fang Denge545abb2014-12-30 18:43:47 -0800806
Richard Barnetteea3e4602016-06-10 12:36:41 -0700807 Servo parameters consist of a host name and port number, and are
808 determined from one of these sources, in order of priority:
809 * Servo attributes from the `dut` parameter take precedence over
810 all other sources of information.
811 * If a DNS entry for the servo based on the DUT hostname exists in
812 the CrOS lab network, that hostname is used with the default
813 port.
814 * If no other options are found, the parameters will be taken
815 from a `servo_args` dict passed in from the caller.
Fang Denge545abb2014-12-30 18:43:47 -0800816
Richard Barnetteea3e4602016-06-10 12:36:41 -0700817 A servo host object will be created if servo parameters can be
818 determined and any of the following criteria are met:
819 * The `servo_args` parameter was not `None`.
820 * The `skip_host_up_check` parameter is true.
821 * The `try_lab_servo` parameter is true, and the specified
822 servo host responds to ping.
Dan Shi4d478522014-02-14 13:46:32 -0800823
Richard Barnetteea3e4602016-06-10 12:36:41 -0700824 The servo host will be checked via `verify()` at the time of
825 creation. Failures are ignored unless the `servo_args` parameter
826 was not `None`. In that case:
827 * If the servo appears to be in the test lab, an attempt will
828 be made to repair it.
829 * If the error isn't repaired, the exception from `verify()` will
830 be passed back to the caller.
831
832 @param dut An instance of `Host` from which to take
833 servo parameters (if available).
834 @param servo_args A dictionary with servo parameters to use if
835 they can't be found from `dut`. If this
836 argument is supplied, unrepaired exceptions
837 from `verify()` will be passed back to the
838 caller.
839 @param try_lab_servo If not true, servo host creation will be
840 skipped unless otherwise required by the
841 caller.
842 @param skip_host_up_check If true, do not check whether the host
843 responds to ping.
Dan Shi4d478522014-02-14 13:46:32 -0800844
845 @returns: A ServoHost object or None. See comments above.
846
847 """
Dan Shi5401d2e2015-09-10 15:42:06 -0700848 required_by_test = servo_args is not None
Richard Barnetteea3e4602016-06-10 12:36:41 -0700849 is_in_lab = False
850 if try_lab_servo or required_by_test:
851 servo_args_override, is_in_lab = _get_standard_servo_args(dut)
852 if servo_args_override is not None:
853 servo_args = servo_args_override
854 if servo_args is None:
855 return None
856 if (required_by_test or skip_host_up_check
857 or servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
858 return ServoHost(required_by_test=required_by_test,
859 is_in_lab=is_in_lab, **servo_args)
Dan Shi4d478522014-02-14 13:46:32 -0800860 else:
Dan Shibbb0cb62014-03-24 17:50:57 -0700861 return None