blob: 617feae5ebbd57fa16c4d49cd5ca2a9c8a102b6a [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
Dan Shi0942b1d2015-03-31 11:07:00 -070021from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080022from autotest_lib.client.common_lib.cros import autoupdater
23from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000027from autotest_lib.client.cros import constants as client_constants
beeps5e8c45a2013-12-17 22:05:11 -080028from autotest_lib.server import site_utils as server_site_utils
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080029from autotest_lib.server.cros import dnsname_mangler
Fang Deng5d518f42013-08-02 14:04:32 -070030from autotest_lib.server.cros.servo import servo
Simran Basi0739d682015-02-25 16:22:56 -080031from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Fang Deng5d518f42013-08-02 14:04:32 -070032from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070033from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070034
35
Simran Basi0739d682015-02-25 16:22:56 -080036# Names of the host attributes in the database that represent the values for
37# the servo_host and servo_port for a servo connected to the DUT.
38SERVO_HOST_ATTR = 'servo_host'
39SERVO_PORT_ATTR = 'servo_port'
40
Richard Barnette17bfc6c2016-08-04 18:41:43 -070041DEFAULT_PORT = 9999
42
Dan Shi3b2adf62015-09-02 17:46:54 -070043_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080044ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
45 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080046
Fang Deng5d518f42013-08-02 14:04:32 -070047class ServoHostException(error.AutoservError):
48 """This is the base class for exceptions raised by ServoHost."""
49 pass
50
51
52class ServoHostVerifyFailure(ServoHostException):
53 """Raised when servo verification fails."""
54 pass
55
56
Fang Dengd4fe7392013-09-20 12:18:21 -070057class ServoHostRepairFailure(ServoHostException):
58 """Raised when a repair method fails to repair a servo host."""
59 pass
60
61
Fang Dengf0ea6142013-10-10 21:43:16 -070062class ServoHostRepairMethodNA(ServoHostException):
63 """Raised when a repair method is not applicable."""
64 pass
65
66
Fang Deng5d518f42013-08-02 14:04:32 -070067class ServoHostRepairTotalFailure(ServoHostException):
68 """Raised if all attempts to repair a servo host fail."""
69 pass
70
71
Fang Deng5d518f42013-08-02 14:04:32 -070072class ServoHost(ssh_host.SSHHost):
73 """Host class for a host that controls a servo, e.g. beaglebone."""
74
75 # Timeout for getting the value of 'pwr_button'.
76 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
77 # Timeout for rebooting servo host.
78 REBOOT_TIMEOUT_SECS = 90
79 HOST_DOWN_TIMEOUT_SECS = 60
80 # Delay after rebooting for servod to become fully functional.
81 REBOOT_DELAY_SECS = 20
82 # Servod process name.
83 SERVOD_PROCESS = 'servod'
Dan Shie5b3c512014-08-21 12:12:09 -070084 # Timeout for initializing servo signals.
85 INITIALIZE_SERVO_TIMEOUT_SECS = 30
xixuan6cf6d2f2016-01-29 15:29:00 -080086 # Ready test function
87 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070088
Fang Dengd4fe7392013-09-20 12:18:21 -070089 _MAX_POWER_CYCLE_ATTEMPTS = 3
Gabe Black1e1c41b2015-02-04 23:55:15 -080090 _timer = autotest_stats.Timer('servo_host')
Fang Dengd4fe7392013-09-20 12:18:21 -070091
Fang Deng5d518f42013-08-02 14:04:32 -070092
Richard Barnette17bfc6c2016-08-04 18:41:43 -070093 def _initialize(self, servo_host='localhost',
94 servo_port=DEFAULT_PORT, required_by_test=True,
95 is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -070096 """Initialize a ServoHost instance.
97
98 A ServoHost instance represents a host that controls a servo.
99
100 @param servo_host: Name of the host where the servod process
101 is running.
102 @param servo_port: Port the servod process is listening on.
Dan Shi4d478522014-02-14 13:46:32 -0800103 @param required_by_test: True if servo is required by test.
104 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
105 to None, for which utils.host_is_in_lab_zone will be
106 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700107
108 """
109 super(ServoHost, self)._initialize(hostname=servo_host,
110 *args, **dargs)
Dan Shi4d478522014-02-14 13:46:32 -0800111 if is_in_lab is None:
112 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
113 else:
114 self._is_in_lab = is_in_lab
Fang Deng5d518f42013-08-02 14:04:32 -0700115 self._is_localhost = (self.hostname == 'localhost')
Kevin Chengcdece6b2016-07-27 12:55:01 -0700116 self._servo_port = servo_port
xixuan6cf6d2f2016-01-29 15:29:00 -0800117
Fang Deng5d518f42013-08-02 14:04:32 -0700118 # Commands on the servo host must be run by the superuser. Our account
119 # on Beaglebone is root, but locally we might be running as a
120 # different user. If so - `sudo ' will have to be added to the
121 # commands.
122 if self._is_localhost:
123 self._sudo_required = utils.system_output('id -u') != '0'
124 else:
125 self._sudo_required = False
Dan Shi4d478522014-02-14 13:46:32 -0800126 # Create a cache of Servo object. This must be called at the end of
127 # _initialize to make sure all attributes are set.
128 self._servo = None
Dan Shi90466352015-09-22 15:01:05 -0700129 self.required_by_test = required_by_test
Dan Shi4d478522014-02-14 13:46:32 -0800130 try:
xixuan2b80c182016-03-28 11:59:30 -0700131 if ENABLE_SSH_TUNNEL_FOR_SERVO:
132 self._servod_server = self.rpc_server_tracker.xmlrpc_connect(
133 None, servo_port, ready_test_name=self.SERVO_READY_METHOD,
134 timeout_seconds=60)
135 else:
136 remote = 'http://%s:%s' % (self.hostname, servo_port)
137 self._servod_server = xmlrpclib.ServerProxy(remote)
Dan Shi4d478522014-02-14 13:46:32 -0800138 self.verify()
Alex Millercc589692014-04-21 18:00:22 -0700139 except Exception:
Dan Shibbb0cb62014-03-24 17:50:57 -0700140 if required_by_test:
141 if not self.is_in_lab():
142 raise
143 else:
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800144 self.repair()
Fang Deng5d518f42013-08-02 14:04:32 -0700145
146
147 def is_in_lab(self):
148 """Check whether the servo host is a lab device.
149
150 @returns: True if the servo host is in Cros Lab, otherwise False.
151
152 """
153 return self._is_in_lab
154
155
156 def is_localhost(self):
157 """Checks whether the servo host points to localhost.
158
159 @returns: True if it points to localhost, otherwise False.
160
161 """
162 return self._is_localhost
163
164
165 def get_servod_server_proxy(self):
166 """Return a proxy that can be used to communicate with servod server.
167
168 @returns: An xmlrpclib.ServerProxy that is connected to the servod
169 server on the host.
170
171 """
172 return self._servod_server
173
174
175 def get_wait_up_processes(self):
176 """Get the list of local processes to wait for in wait_up.
177
178 Override get_wait_up_processes in
179 autotest_lib.client.common_lib.hosts.base_classes.Host.
180 Wait for servod process to go up. Called by base class when
181 rebooting the device.
182
183 """
184 processes = [self.SERVOD_PROCESS]
185 return processes
186
187
beeps5e8c45a2013-12-17 22:05:11 -0800188 def _is_cros_host(self):
189 """Check if a servo host is running chromeos.
190
191 @return: True if the servo host is running chromeos.
192 False if it isn't, or we don't have enough information.
193 """
194 try:
195 result = self.run('grep -q CHROMEOS /etc/lsb-release',
196 ignore_status=True, timeout=10)
197 except (error.AutoservRunError, error.AutoservSSHTimeout):
198 return False
199 return result.exit_status == 0
200
201
Fang Deng5d518f42013-08-02 14:04:32 -0700202 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
203 connect_timeout=None, alive_interval=None):
204 """Override default make_ssh_command to use tuned options.
205
206 Tuning changes:
207 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
208 connection failure. Consistency with remote_access.py.
209
210 - ServerAliveInterval=180; which causes SSH to ping connection every
211 180 seconds. In conjunction with ServerAliveCountMax ensures
212 that if the connection dies, Autotest will bail out quickly.
213
214 - ServerAliveCountMax=3; consistency with remote_access.py.
215
216 - ConnectAttempts=4; reduce flakiness in connection errors;
217 consistency with remote_access.py.
218
219 - UserKnownHostsFile=/dev/null; we don't care about the keys.
220
221 - SSH protocol forced to 2; needed for ServerAliveInterval.
222
223 @param user User name to use for the ssh connection.
224 @param port Port on the target host to use for ssh connection.
225 @param opts Additional options to the ssh command.
226 @param hosts_file Ignored.
227 @param connect_timeout Ignored.
228 @param alive_interval Ignored.
229
230 @returns: An ssh command with the requested settings.
231
232 """
233 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
234 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
235 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
236 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
237 ' -o Protocol=2 -l %s -p %d')
238 return base_command % (opts, user, port)
239
240
241 def _make_scp_cmd(self, sources, dest):
242 """Format scp command.
243
244 Given a list of source paths and a destination path, produces the
245 appropriate scp command for encoding it. Remote paths must be
246 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
247 to allow additional ssh options.
248
249 @param sources: A list of source paths to copy from.
250 @param dest: Destination path to copy to.
251
252 @returns: An scp command that copies |sources| on local machine to
253 |dest| on the remote servo host.
254
255 """
256 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
257 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
258 return command % (self.master_ssh_option,
259 self.port, ' '.join(sources), dest)
260
261
262 def run(self, command, timeout=3600, ignore_status=False,
263 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
264 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
265 """Run a command on the servo host.
266
267 Extends method `run` in SSHHost. If the servo host is a remote device,
268 it will call `run` in SSHost without changing anything.
269 If the servo host is 'localhost', it will call utils.system_output.
270
271 @param command: The command line string.
272 @param timeout: Time limit in seconds before attempting to
273 kill the running process. The run() function
274 will take a few seconds longer than 'timeout'
275 to complete if it has to kill the process.
276 @param ignore_status: Do not raise an exception, no matter
277 what the exit code of the command is.
278 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
279 @param connect_timeout: SSH connection timeout (in seconds)
280 Ignored if host is 'localhost'.
281 @param options: String with additional ssh command options
282 Ignored if host is 'localhost'.
283 @param stdin: Stdin to pass (a string) to the executed command.
284 @param verbose: Log the commands.
285 @param args: Sequence of strings to pass as arguments to command by
286 quoting them in " and escaping their contents if necessary.
287
288 @returns: A utils.CmdResult object.
289
290 @raises AutoservRunError if the command failed.
291 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
292 when servo host is not 'localhost'.
293
294 """
295 run_args = {'command': command, 'timeout': timeout,
296 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
297 'stderr_tee': stderr_tee, 'stdin': stdin,
298 'verbose': verbose, 'args': args}
299 if self.is_localhost():
300 if self._sudo_required:
301 run_args['command'] = 'sudo -n %s' % command
302 try:
303 return utils.run(**run_args)
304 except error.CmdError as e:
305 logging.error(e)
306 raise error.AutoservRunError('command execution error',
307 e.result_obj)
308 else:
309 run_args['connect_timeout'] = connect_timeout
310 run_args['options'] = options
311 return super(ServoHost, self).run(**run_args)
312
313
Dan Shi33412a82014-06-10 15:12:27 -0700314 @_timer.decorate
Fang Deng5d518f42013-08-02 14:04:32 -0700315 def _check_servod(self):
316 """A sanity check of the servod state."""
317 msg_prefix = 'Servod error: %s'
318 error_msg = None
319 try:
320 timeout, _ = retry.timeout(
321 self._servod_server.get, args=('pwr_button', ),
322 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
323 if timeout:
324 error_msg = msg_prefix % 'Request timed out.'
325 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
326 error_msg = msg_prefix % e
327 if error_msg:
328 raise ServoHostVerifyFailure(error_msg)
329
330
Dan Shi33412a82014-06-10 15:12:27 -0700331 def _check_servo_config(self):
332 """Check if config file exists for servod.
333
334 If servod config file does not exist, there is no need to verify if
335 servo is working. The servo could be attached to a board not supported
336 yet.
337
338 @raises ServoHostVerifyFailure if /var/lib/servod/config does not exist.
339
340 """
Kevin Chengcdece6b2016-07-27 12:55:01 -0700341 if self._is_localhost or not self._is_cros_host():
342 logging.info('We will skip servo config check, either %s '
343 'is not running chromeos or we cannot find enough '
344 'information about the host.', self.hostname)
Simran Basi0739d682015-02-25 16:22:56 -0800345 return
Kevin Chengcdece6b2016-07-27 12:55:01 -0700346
347 failure_data = []
348 servod_config_file = '/var/lib/servod/config'
349 config_files = ['%s_%s' % (servod_config_file, self._servo_port),
350 servod_config_file]
351
352 # We'll need to check for two types of config files since we're
353 # transistioning to support a new servo setup and we need to keep both
354 # to enable successful reverts.
355 # TODO(kevcheng): We can get rid of checking for servod_config_file once
356 # the fleet of beaglebones all have new style config file.
357 for config_file in config_files:
358 try:
359 self.run('test -f %s' % config_file)
Ricky Liang86b80182014-06-13 14:39:42 +0800360 return
Kevin Chengcdece6b2016-07-27 12:55:01 -0700361 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
362 failure_data.append((config_file, e))
363
364 failure_message = ('Servo config file check failed for %s: ' %
365 self.hostname)
366 for data in failure_data:
367 failure_message += '%s (%s) ' % (data[0], data[1])
368 raise ServoHostVerifyFailure(failure_message)
Dan Shi33412a82014-06-10 15:12:27 -0700369
370
Dan Shie5b3c512014-08-21 12:12:09 -0700371 def _check_servod_status(self):
372 """Check if servod process is running.
373
374 If servod is not running, there is no need to verify if servo is
375 working. Check the process before making any servod call can avoid
376 long timeout that eventually fail any servod call.
377 If the servo host is set to localhost, failure of servod status check
378 will be ignored, as servo call may use ssh tunnel.
379
380 @raises ServoHostVerifyFailure if servod process does not exist.
381
382 """
383 try:
Dan Shi18040e42014-09-03 11:14:00 -0700384 pids = [str(int(s)) for s in
385 self.run('pgrep servod').stdout.strip().split('\n')]
386 logging.info('servod is running, PID=%s', ','.join(pids))
Dan Shie5b3c512014-08-21 12:12:09 -0700387 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
388 if self._is_localhost:
389 logging.info('Ignoring servod status check failure. servo host '
390 'is set to localhost, servo call may use ssh '
391 'tunnel to go through.')
392 else:
393 raise ServoHostVerifyFailure(
394 'Servod status check failed for %s: %s' %
395 (self.hostname, e))
396
397
Dan Shi0942b1d2015-03-31 11:07:00 -0700398 def get_release_version(self):
399 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
400
401 @returns The version string in lsb-release, under attribute
402 CHROMEOS_RELEASE_VERSION.
403 """
404 lsb_release_content = self.run(
405 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
406 return lsbrelease_utils.get_chromeos_release_version(
407 lsb_release_content=lsb_release_content)
408
409
Richard Barnette3a7697f2016-04-20 11:33:27 -0700410 def _check_for_reboot(self, updater):
411 """
412 Reboot this servo host if an upgrade is waiting.
413
414 If the host has successfully downloaded and finalized a new
415 build, reboot.
416
417 @param updater: a ChromiumOSUpdater instance for checking
418 whether reboot is needed.
419 @return Return a (status, build) tuple reflecting the
420 update_engine status and current build of the host
421 at the end of the call.
422 """
423 current_build_number = self.get_release_version()
424 status = updater.check_update_status()
425 if status == autoupdater.UPDATER_NEED_REBOOT:
426 logging.info('Rebooting beaglebone host %s from build %s',
427 self.hostname, current_build_number)
428 # Tell the reboot() call not to wait for completion.
429 # Otherwise, the call will log reboot failure if servo does
430 # not come back. The logged reboot failure will lead to
431 # test job failure. If the test does not require servo, we
432 # don't want servo failure to fail the test with error:
433 # `Host did not return from reboot` in status.log.
434 reboot_cmd = 'sleep 1 ; reboot & sleep 10; reboot -f',
Richard Barnetteab9769f2016-06-01 15:01:44 -0700435 self.reboot(reboot_cmd=reboot_cmd, fastsync=True, wait=False)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700436
437 # We told the reboot() call not to wait, but we need to wait
438 # for the reboot before we continue. Alas. The code from
439 # here below is basically a copy of Host.wait_for_restart(),
440 # with the logging bits ripped out, so that they can't cause
441 # the failure logging problem described above.
442 #
443 # The black stain that this has left on my soul can never be
444 # erased.
445 old_boot_id = self.get_boot_id()
446 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
447 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
448 old_boot_id=old_boot_id):
449 raise error.AutoservHostError(
450 'servo host %s failed to shut down.' %
451 self.hostname)
452 if self.wait_up(timeout=120):
453 current_build_number = self.get_release_version()
454 status = updater.check_update_status()
455 logging.info('servo host %s back from reboot, with build %s',
456 self.hostname, current_build_number)
457 else:
458 raise error.AutoservHostError(
459 'servo host %s failed to come back from reboot.' %
460 self.hostname)
461 return status, current_build_number
462
463
beeps5e8c45a2013-12-17 22:05:11 -0800464 @_timer.decorate
Richard Barnette3a7697f2016-04-20 11:33:27 -0700465 def update_image(self, wait_for_update=False):
beeps5e8c45a2013-12-17 22:05:11 -0800466 """Update the image on the servo host, if needed.
467
J. Richard Barnette84895392015-04-30 12:31:01 -0700468 This method recognizes the following cases:
469 * If the Host is not running Chrome OS, do nothing.
470 * If a previously triggered update is now complete, reboot
471 to the new version.
472 * If the host is processing a previously triggered update,
473 do nothing.
474 * If the host is running a version of Chrome OS different
475 from the default for servo Hosts, trigger an update, but
476 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800477
Richard Barnette3a7697f2016-04-20 11:33:27 -0700478 @param wait_for_update If an update needs to be applied and
479 this is true, then don't return until the update is
480 downloaded and finalized, and the host rebooted.
beeps5e8c45a2013-12-17 22:05:11 -0800481 @raises dev_server.DevServerException: If all the devservers are down.
482 @raises site_utils.ParseBuildNameException: If the devserver returns
483 an invalid build name.
484 @raises autoupdater.ChromiumOSError: If something goes wrong in the
485 checking update engine client status or applying an update.
486 @raises AutoservRunError: If the update_engine_client isn't present on
487 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700488
beeps5e8c45a2013-12-17 22:05:11 -0800489 """
Dan Shib795b5a2015-09-24 13:26:35 -0700490 # servod could be running in a Ubuntu workstation.
beeps5e8c45a2013-12-17 22:05:11 -0800491 if not self._is_cros_host():
492 logging.info('Not attempting an update, either %s is not running '
493 'chromeos or we cannot find enough information about '
494 'the host.', self.hostname)
495 return
496
Dan Shib795b5a2015-09-24 13:26:35 -0700497 if lsbrelease_utils.is_moblab():
498 logging.info('Not attempting an update, %s is running moblab.',
499 self.hostname)
500 return
501
Richard Barnette3a7697f2016-04-20 11:33:27 -0700502 board = _CONFIG.get_config_value('CROS', 'servo_board')
J. Richard Barnette84895392015-04-30 12:31:01 -0700503 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
504 target_version = afe.run('get_stable_version', board=board)
Dan Shi3b2adf62015-09-02 17:46:54 -0700505 build_pattern = _CONFIG.get_config_value(
J. Richard Barnette84895392015-04-30 12:31:01 -0700506 'CROS', 'stable_build_pattern')
507 target_build = build_pattern % (board, target_version)
508 target_build_number = server_site_utils.ParseBuildName(
509 target_build)[3]
beeps5e8c45a2013-12-17 22:05:11 -0800510 ds = dev_server.ImageServer.resolve(self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700511 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800512
513 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700514 status, current_build_number = self._check_for_reboot(updater)
515 update_pending = True
beeps5e8c45a2013-12-17 22:05:11 -0800516 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
517 logging.info('servo host %s already processing an update, update '
518 'engine client status=%s', self.hostname, status)
J. Richard Barnette84895392015-04-30 12:31:01 -0700519 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800520 logging.info('Using devserver url: %s to trigger update on '
521 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700522 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800523 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700524 ds.stage_artifacts(target_build,
525 artifacts=['full_payload'])
526 except Exception as e:
527 logging.error('Staging artifacts failed: %s', str(e))
528 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800529 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700530 try:
Richard Barnette7e53aa02016-05-20 10:49:40 -0700531 # TODO(jrbarnette): This 'touch' is a gross hack
532 # to get us past crbug.com/613603. Once that
533 # bug is resolved, we should remove this code.
534 self.run('touch /home/chronos/.oobe_completed')
J. Richard Barnette84895392015-04-30 12:31:01 -0700535 updater.trigger_update()
536 except autoupdater.RootFSUpdateError as e:
537 trigger_download_status = 'failed with %s' % str(e)
538 autotest_stats.Counter(
539 'servo_host.RootFSUpdateError').increment()
540 else:
541 trigger_download_status = 'passed'
542 logging.info('Triggered download and update %s for %s, '
543 'update engine currently in status %s',
544 trigger_download_status, self.hostname,
545 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800546 else:
547 logging.info('servo host %s does not require an update.',
548 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700549 update_pending = False
550
551 if update_pending and wait_for_update:
552 logging.info('Waiting for servo update to complete.')
553 self.run('update_engine_client --follow', ignore_status=True)
554 status, current_build_number = self._check_for_reboot(updater)
555 if (status != autoupdater.UPDATER_IDLE or
556 current_build_number != target_build_number):
557 logging.error('Update failed; status: %s, '
558 'actual build: %s',
559 status, current_build_number)
560 message = ('Servo host failed to update from %s to %s' %
561 (current_build_number, target_build_number))
562 raise error.AutoservHostError(message)
beeps5e8c45a2013-12-17 22:05:11 -0800563
564
Fang Deng5d518f42013-08-02 14:04:32 -0700565 def verify_software(self):
beeps5e8c45a2013-12-17 22:05:11 -0800566 """Update the servo host and verify it's in a good state.
Fang Deng5d518f42013-08-02 14:04:32 -0700567
568 It overrides the base class function for verify_software.
beeps5e8c45a2013-12-17 22:05:11 -0800569 If an update is available, downloads and applies it. Then verifies:
Fang Deng5d518f42013-08-02 14:04:32 -0700570 1) Whether basic servo command can run successfully.
571 2) Whether USB is in a good state. crbug.com/225932
572
573 @raises ServoHostVerifyFailure if servo host does not pass the checks.
574
575 """
Richard Barnette79d78c42016-05-25 09:31:21 -0700576 # TODO(jrbarnette) Old versions of beaglebone_servo include
577 # the powerd package. In some (not yet understood)
578 # circumstances, powerd on beaglebone will shut down after
579 # attempting to suspend. Current versions of
580 # beaglebone_servo don't have powerd, but until we can purge
581 # the lab of the old images, we need to make sure powerd
582 # isn't running.
583 self.run('stop powerd', ignore_status=True)
584
beeps5e8c45a2013-12-17 22:05:11 -0800585 logging.info('Applying an update to the servo host, if necessary.')
Richard Barnette3a7697f2016-04-20 11:33:27 -0700586 self.update_image(wait_for_update=False)
Dan Shi33412a82014-06-10 15:12:27 -0700587 self._check_servo_config()
Dan Shie5b3c512014-08-21 12:12:09 -0700588 self._check_servod_status()
589
Dan Shi4d478522014-02-14 13:46:32 -0800590 # If servo is already initialized, we don't need to do it again, call
591 # _check_servod should be enough.
592 if self._servo:
593 self._check_servod()
594 else:
595 self._servo = servo.Servo(servo_host=self)
Dan Shie5b3c512014-08-21 12:12:09 -0700596 timeout, _ = retry.timeout(
597 self._servo.initialize_dut,
598 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
599 if timeout:
600 raise ServoHostVerifyFailure('Servo initialize timed out.')
Fang Deng5d518f42013-08-02 14:04:32 -0700601 logging.info('Sanity checks pass on servo host %s', self.hostname)
602
603
604 def _repair_with_sysrq_reboot(self):
605 """Reboot with magic SysRq key."""
606 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
607 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
Richard Barnetteab9769f2016-06-01 15:01:44 -0700608 reboot_cmd='echo "b" > /proc/sysrq-trigger',
Fang Deng5d518f42013-08-02 14:04:32 -0700609 fastsync=True)
610 time.sleep(self.REBOOT_DELAY_SECS)
611
612
Fang Dengd4fe7392013-09-20 12:18:21 -0700613 def has_power(self):
614 """Return whether or not the servo host is powered by PoE."""
615 # TODO(fdeng): See crbug.com/302791
616 # For now, assume all servo hosts in the lab have power.
617 return self.is_in_lab()
618
619
620 def power_cycle(self):
621 """Cycle power to this host via PoE if it is a lab device.
622
623 @raises ServoHostRepairFailure if it fails to power cycle the
624 servo host.
625
626 """
627 if self.has_power():
628 try:
629 rpm_client.set_power(self.hostname, 'CYCLE')
630 except (socket.error, xmlrpclib.Error,
631 httplib.BadStatusLine,
632 rpm_client.RemotePowerException) as e:
633 raise ServoHostRepairFailure(
634 'Power cycling %s failed: %s' % (self.hostname, e))
635 else:
636 logging.info('Skipping power cycling, not a lab device.')
637
638
Fang Deng5d518f42013-08-02 14:04:32 -0700639 def _powercycle_to_repair(self):
Fang Dengd4fe7392013-09-20 12:18:21 -0700640 """Power cycle the servo host using PoE.
641
642 @raises ServoHostRepairFailure if it fails to fix the servo host.
Fang Dengf0ea6142013-10-10 21:43:16 -0700643 @raises ServoHostRepairMethodNA if it does not support power.
Fang Dengd4fe7392013-09-20 12:18:21 -0700644
645 """
646 if not self.has_power():
Fang Dengf0ea6142013-10-10 21:43:16 -0700647 raise ServoHostRepairMethodNA('%s does not support power.' %
648 self.hostname)
Fang Dengd4fe7392013-09-20 12:18:21 -0700649 logging.info('Attempting repair via PoE powercycle.')
650 failed_cycles = 0
651 self.power_cycle()
652 while not self.wait_up(timeout=self.REBOOT_TIMEOUT_SECS):
653 failed_cycles += 1
654 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
655 raise ServoHostRepairFailure(
656 'Powercycled host %s %d times; device did not come back'
657 ' online.' % (self.hostname, failed_cycles))
658 self.power_cycle()
659 logging.info('Powercycling was successful after %d failures.',
660 failed_cycles)
661 # Allow some time for servod to get started.
662 time.sleep(self.REBOOT_DELAY_SECS)
Fang Deng5d518f42013-08-02 14:04:32 -0700663
664
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800665 def repair(self):
Fang Deng5d518f42013-08-02 14:04:32 -0700666 """Attempt to repair servo host.
667
668 This overrides the base class function for repair.
669 Note if the host is not in Cros Lab, the repair procedure
670 will be skipped.
671
672 @raises ServoHostRepairTotalFailure if all attempts fail.
673
674 """
675 if not self.is_in_lab():
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700676 logging.warning('Skip repairing servo host %s: Not a lab device.',
Fang Deng5d518f42013-08-02 14:04:32 -0700677 self.hostname)
678 return
679 logging.info('Attempting to repair servo host %s.', self.hostname)
Dan Shi4d478522014-02-14 13:46:32 -0800680 # Reset the cache to guarantee servo initialization being called later.
681 self._servo = None
Tom Wai-Hong Tam0635dce2016-06-02 02:17:50 +0800682 repair_funcs = [self._repair_with_sysrq_reboot,
683 self._powercycle_to_repair]
Fang Deng5d518f42013-08-02 14:04:32 -0700684 errors = []
685 for repair_func in repair_funcs:
Fang Dengf0ea6142013-10-10 21:43:16 -0700686 counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
Fang Deng5d518f42013-08-02 14:04:32 -0700687 try:
688 repair_func()
689 self.verify()
Gabe Black1e1c41b2015-02-04 23:55:15 -0800690 autotest_stats.Counter(counter_prefix + 'SUCCEEDED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700691 return
Fang Dengf0ea6142013-10-10 21:43:16 -0700692 except ServoHostRepairMethodNA as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700693 logging.warning('Repair method NA: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800694 autotest_stats.Counter(counter_prefix + 'RepairNA').increment()
Fang Dengf0ea6142013-10-10 21:43:16 -0700695 errors.append(str(e))
Fang Deng5d518f42013-08-02 14:04:32 -0700696 except Exception as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700697 logging.warning('Failed to repair servo: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800698 autotest_stats.Counter(counter_prefix + 'FAILED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700699 errors.append(str(e))
Gabe Black1e1c41b2015-02-04 23:55:15 -0800700 autotest_stats.Counter('servo_host_repair.Full_Repair_Failed'). \
701 increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700702 raise ServoHostRepairTotalFailure(
703 'All attempts at repairing the servo failed:\n%s' %
704 '\n'.join(errors))
705
706
Dan Shi4d478522014-02-14 13:46:32 -0800707 def get_servo(self):
708 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700709
Dan Shi4d478522014-02-14 13:46:32 -0800710 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700711 """
Dan Shi4d478522014-02-14 13:46:32 -0800712 return self._servo
713
714
Richard Barnetteea3e4602016-06-10 12:36:41 -0700715def make_servo_hostname(dut_hostname):
716 """Given a DUT's hostname, return the hostname of its servo.
717
718 @param dut_hostname: hostname of a DUT.
719
720 @return hostname of the DUT's servo.
721
722 """
723 host_parts = dut_hostname.split('.')
724 host_parts[0] = host_parts[0] + '-servo'
725 return '.'.join(host_parts)
726
727
728def servo_host_is_up(servo_hostname):
729 """
730 Given a servo host name, return if it's up or not.
731
732 @param servo_hostname: hostname of the servo host.
733
734 @return True if it's up, False otherwise
735 """
736 # Technically, this duplicates the SSH ping done early in the servo
737 # proxy initialization code. However, this ping ends in a couple
738 # seconds when if fails, rather than the 60 seconds it takes to decide
739 # that an SSH ping has timed out. Specifically, that timeout happens
740 # when our servo DNS name resolves, but there is no host at that IP.
741 logging.info('Pinging servo host at %s', servo_hostname)
742 ping_config = ping_runner.PingConfig(
743 servo_hostname, count=3,
744 ignore_result=True, ignore_status=True)
745 return ping_runner.PingRunner().ping(ping_config).received > 0
746
747
748def _get_standard_servo_args(dut_host):
749 """
750 Return servo data associated with a given DUT.
751
752 This checks for the presence of servo host and port attached to the
753 given `dut_host`. This data should be stored in the
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700754 `_afe_host.attributes` field in the provided `dut_host` parameter.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700755
756 @param dut_host Instance of `Host` on which to find the servo
757 attributes.
758 @return A tuple of `servo_args` dict with host and an option port,
759 plus an `is_in_lab` flag indicating whether this in the CrOS
760 test lab, or some different environment.
761 """
762 servo_args = None
763 is_in_lab = False
764 is_ssp_moblab = False
765 if utils.is_in_container():
766 is_moblab = _CONFIG.get_config_value(
767 'SSP', 'is_moblab', type=bool, default=False)
768 is_ssp_moblab = is_moblab
769 else:
770 is_moblab = utils.is_moblab()
Kevin Cheng05ae2a42016-06-06 10:12:48 -0700771 attrs = dut_host._afe_host.attributes
Richard Barnetteea3e4602016-06-10 12:36:41 -0700772 if attrs and SERVO_HOST_ATTR in attrs:
773 servo_host = attrs[SERVO_HOST_ATTR]
774 if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']):
775 servo_host = _CONFIG.get_config_value(
776 'SSP', 'host_container_ip', type=str, default=None)
777 servo_args = {SERVO_HOST_ATTR: servo_host}
778 if SERVO_PORT_ATTR in attrs:
779 servo_args[SERVO_PORT_ATTR] = attrs[SERVO_PORT_ATTR]
780 is_in_lab = (not is_moblab
781 and utils.host_is_in_lab_zone(servo_host))
782
783 # TODO(jrbarnette): This test to use the default lab servo hostname
784 # is a legacy that we need only until every host in the DB has
785 # proper attributes.
786 elif (not is_moblab and
787 not dnsname_mangler.is_ip_address(dut_host.hostname)):
788 servo_host = make_servo_hostname(dut_host.hostname)
789 is_in_lab = utils.host_is_in_lab_zone(servo_host)
790 if is_in_lab:
791 servo_args = {SERVO_HOST_ATTR: servo_host}
792 return servo_args, is_in_lab
793
794
Dan Shi023aae32016-05-25 11:13:01 -0700795def create_servo_host(dut, servo_args, try_lab_servo=False,
796 skip_host_up_check=False):
Richard Barnetteea3e4602016-06-10 12:36:41 -0700797 """
798 Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -0800799
Richard Barnetteea3e4602016-06-10 12:36:41 -0700800 This function attempts to create a `ServoHost` object for a servo
801 connected to the given `dut`. The function distinguishes these
802 cases:
803 * No servo parameters for the DUT can be determined. No servo
804 host is created.
805 * The servo host should be created if parameters can be
806 determined.
807 * The servo host should not be created even if parameters are
808 known.
Fang Denge545abb2014-12-30 18:43:47 -0800809
Richard Barnetteea3e4602016-06-10 12:36:41 -0700810 Servo parameters consist of a host name and port number, and are
811 determined from one of these sources, in order of priority:
812 * Servo attributes from the `dut` parameter take precedence over
813 all other sources of information.
814 * If a DNS entry for the servo based on the DUT hostname exists in
815 the CrOS lab network, that hostname is used with the default
816 port.
817 * If no other options are found, the parameters will be taken
818 from a `servo_args` dict passed in from the caller.
Fang Denge545abb2014-12-30 18:43:47 -0800819
Richard Barnetteea3e4602016-06-10 12:36:41 -0700820 A servo host object will be created if servo parameters can be
821 determined and any of the following criteria are met:
822 * The `servo_args` parameter was not `None`.
823 * The `skip_host_up_check` parameter is true.
824 * The `try_lab_servo` parameter is true, and the specified
825 servo host responds to ping.
Dan Shi4d478522014-02-14 13:46:32 -0800826
Richard Barnetteea3e4602016-06-10 12:36:41 -0700827 The servo host will be checked via `verify()` at the time of
828 creation. Failures are ignored unless the `servo_args` parameter
829 was not `None`. In that case:
830 * If the servo appears to be in the test lab, an attempt will
831 be made to repair it.
832 * If the error isn't repaired, the exception from `verify()` will
833 be passed back to the caller.
834
835 @param dut An instance of `Host` from which to take
836 servo parameters (if available).
837 @param servo_args A dictionary with servo parameters to use if
838 they can't be found from `dut`. If this
839 argument is supplied, unrepaired exceptions
840 from `verify()` will be passed back to the
841 caller.
842 @param try_lab_servo If not true, servo host creation will be
843 skipped unless otherwise required by the
844 caller.
845 @param skip_host_up_check If true, do not check whether the host
846 responds to ping.
Dan Shi4d478522014-02-14 13:46:32 -0800847
848 @returns: A ServoHost object or None. See comments above.
849
850 """
Dan Shi5401d2e2015-09-10 15:42:06 -0700851 required_by_test = servo_args is not None
Richard Barnetteea3e4602016-06-10 12:36:41 -0700852 is_in_lab = False
853 if try_lab_servo or required_by_test:
854 servo_args_override, is_in_lab = _get_standard_servo_args(dut)
855 if servo_args_override is not None:
856 servo_args = servo_args_override
857 if servo_args is None:
858 return None
859 if (required_by_test or skip_host_up_check
860 or servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
861 return ServoHost(required_by_test=required_by_test,
862 is_in_lab=is_in_lab, **servo_args)
Dan Shi4d478522014-02-14 13:46:32 -0800863 else:
Dan Shibbb0cb62014-03-24 17:50:57 -0700864 return None