blob: fca324321590694cafa524100ade592eae7a365c [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import time
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080020from autotest_lib.client.common_lib import global_config
Dan Shi0942b1d2015-03-31 11:07:00 -070021from autotest_lib.client.common_lib import lsbrelease_utils
beeps5e8c45a2013-12-17 22:05:11 -080022from autotest_lib.client.common_lib.cros import autoupdater
23from autotest_lib.client.common_lib.cros import dev_server
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Gabe Black1e1c41b2015-02-04 23:55:15 -080025from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000027from autotest_lib.client.cros import constants as client_constants
beeps5e8c45a2013-12-17 22:05:11 -080028from autotest_lib.server import site_utils as server_site_utils
Cheng-Yi Chiang22612862015-08-20 20:39:57 +080029from autotest_lib.server.cros import dnsname_mangler
Fang Deng5d518f42013-08-02 14:04:32 -070030from autotest_lib.server.cros.servo import servo
Simran Basi0739d682015-02-25 16:22:56 -080031from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Fang Deng5d518f42013-08-02 14:04:32 -070032from autotest_lib.server.hosts import ssh_host
Fang Dengd4fe7392013-09-20 12:18:21 -070033from autotest_lib.site_utils.rpm_control_system import rpm_client
Fang Deng5d518f42013-08-02 14:04:32 -070034
35
Simran Basi0739d682015-02-25 16:22:56 -080036# Names of the host attributes in the database that represent the values for
37# the servo_host and servo_port for a servo connected to the DUT.
38SERVO_HOST_ATTR = 'servo_host'
39SERVO_PORT_ATTR = 'servo_port'
40
Dan Shi3b2adf62015-09-02 17:46:54 -070041_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080042ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
43 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080044
Fang Deng5d518f42013-08-02 14:04:32 -070045class ServoHostException(error.AutoservError):
46 """This is the base class for exceptions raised by ServoHost."""
47 pass
48
49
50class ServoHostVerifyFailure(ServoHostException):
51 """Raised when servo verification fails."""
52 pass
53
54
Fang Dengd4fe7392013-09-20 12:18:21 -070055class ServoHostRepairFailure(ServoHostException):
56 """Raised when a repair method fails to repair a servo host."""
57 pass
58
59
Fang Dengf0ea6142013-10-10 21:43:16 -070060class ServoHostRepairMethodNA(ServoHostException):
61 """Raised when a repair method is not applicable."""
62 pass
63
64
Fang Deng5d518f42013-08-02 14:04:32 -070065class ServoHostRepairTotalFailure(ServoHostException):
66 """Raised if all attempts to repair a servo host fail."""
67 pass
68
69
Fang Deng5d518f42013-08-02 14:04:32 -070070class ServoHost(ssh_host.SSHHost):
71 """Host class for a host that controls a servo, e.g. beaglebone."""
72
73 # Timeout for getting the value of 'pwr_button'.
74 PWR_BUTTON_CMD_TIMEOUT_SECS = 15
75 # Timeout for rebooting servo host.
76 REBOOT_TIMEOUT_SECS = 90
77 HOST_DOWN_TIMEOUT_SECS = 60
78 # Delay after rebooting for servod to become fully functional.
79 REBOOT_DELAY_SECS = 20
80 # Servod process name.
81 SERVOD_PROCESS = 'servod'
Dan Shie5b3c512014-08-21 12:12:09 -070082 # Timeout for initializing servo signals.
83 INITIALIZE_SERVO_TIMEOUT_SECS = 30
xixuan6cf6d2f2016-01-29 15:29:00 -080084 # Ready test function
85 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070086
Fang Dengd4fe7392013-09-20 12:18:21 -070087 _MAX_POWER_CYCLE_ATTEMPTS = 3
Gabe Black1e1c41b2015-02-04 23:55:15 -080088 _timer = autotest_stats.Timer('servo_host')
Fang Dengd4fe7392013-09-20 12:18:21 -070089
Fang Deng5d518f42013-08-02 14:04:32 -070090
91 def _initialize(self, servo_host='localhost', servo_port=9999,
Dan Shi4d478522014-02-14 13:46:32 -080092 required_by_test=True, is_in_lab=None, *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -070093 """Initialize a ServoHost instance.
94
95 A ServoHost instance represents a host that controls a servo.
96
97 @param servo_host: Name of the host where the servod process
98 is running.
99 @param servo_port: Port the servod process is listening on.
Dan Shi4d478522014-02-14 13:46:32 -0800100 @param required_by_test: True if servo is required by test.
101 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
102 to None, for which utils.host_is_in_lab_zone will be
103 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700104
105 """
106 super(ServoHost, self)._initialize(hostname=servo_host,
107 *args, **dargs)
Dan Shi4d478522014-02-14 13:46:32 -0800108 if is_in_lab is None:
109 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
110 else:
111 self._is_in_lab = is_in_lab
Fang Deng5d518f42013-08-02 14:04:32 -0700112 self._is_localhost = (self.hostname == 'localhost')
xixuan6cf6d2f2016-01-29 15:29:00 -0800113
Fang Deng5d518f42013-08-02 14:04:32 -0700114 # Commands on the servo host must be run by the superuser. Our account
115 # on Beaglebone is root, but locally we might be running as a
116 # different user. If so - `sudo ' will have to be added to the
117 # commands.
118 if self._is_localhost:
119 self._sudo_required = utils.system_output('id -u') != '0'
120 else:
121 self._sudo_required = False
Dan Shi4d478522014-02-14 13:46:32 -0800122 # Create a cache of Servo object. This must be called at the end of
123 # _initialize to make sure all attributes are set.
124 self._servo = None
Dan Shi90466352015-09-22 15:01:05 -0700125 self.required_by_test = required_by_test
Dan Shi4d478522014-02-14 13:46:32 -0800126 try:
xixuan2b80c182016-03-28 11:59:30 -0700127 if ENABLE_SSH_TUNNEL_FOR_SERVO:
128 self._servod_server = self.rpc_server_tracker.xmlrpc_connect(
129 None, servo_port, ready_test_name=self.SERVO_READY_METHOD,
130 timeout_seconds=60)
131 else:
132 remote = 'http://%s:%s' % (self.hostname, servo_port)
133 self._servod_server = xmlrpclib.ServerProxy(remote)
Dan Shi4d478522014-02-14 13:46:32 -0800134 self.verify()
Alex Millercc589692014-04-21 18:00:22 -0700135 except Exception:
Dan Shibbb0cb62014-03-24 17:50:57 -0700136 if required_by_test:
137 if not self.is_in_lab():
138 raise
139 else:
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800140 self.repair()
Fang Deng5d518f42013-08-02 14:04:32 -0700141
142
143 def is_in_lab(self):
144 """Check whether the servo host is a lab device.
145
146 @returns: True if the servo host is in Cros Lab, otherwise False.
147
148 """
149 return self._is_in_lab
150
151
152 def is_localhost(self):
153 """Checks whether the servo host points to localhost.
154
155 @returns: True if it points to localhost, otherwise False.
156
157 """
158 return self._is_localhost
159
160
161 def get_servod_server_proxy(self):
162 """Return a proxy that can be used to communicate with servod server.
163
164 @returns: An xmlrpclib.ServerProxy that is connected to the servod
165 server on the host.
166
167 """
168 return self._servod_server
169
170
171 def get_wait_up_processes(self):
172 """Get the list of local processes to wait for in wait_up.
173
174 Override get_wait_up_processes in
175 autotest_lib.client.common_lib.hosts.base_classes.Host.
176 Wait for servod process to go up. Called by base class when
177 rebooting the device.
178
179 """
180 processes = [self.SERVOD_PROCESS]
181 return processes
182
183
beeps5e8c45a2013-12-17 22:05:11 -0800184 def _is_cros_host(self):
185 """Check if a servo host is running chromeos.
186
187 @return: True if the servo host is running chromeos.
188 False if it isn't, or we don't have enough information.
189 """
190 try:
191 result = self.run('grep -q CHROMEOS /etc/lsb-release',
192 ignore_status=True, timeout=10)
193 except (error.AutoservRunError, error.AutoservSSHTimeout):
194 return False
195 return result.exit_status == 0
196
197
Fang Deng5d518f42013-08-02 14:04:32 -0700198 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
199 connect_timeout=None, alive_interval=None):
200 """Override default make_ssh_command to use tuned options.
201
202 Tuning changes:
203 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
204 connection failure. Consistency with remote_access.py.
205
206 - ServerAliveInterval=180; which causes SSH to ping connection every
207 180 seconds. In conjunction with ServerAliveCountMax ensures
208 that if the connection dies, Autotest will bail out quickly.
209
210 - ServerAliveCountMax=3; consistency with remote_access.py.
211
212 - ConnectAttempts=4; reduce flakiness in connection errors;
213 consistency with remote_access.py.
214
215 - UserKnownHostsFile=/dev/null; we don't care about the keys.
216
217 - SSH protocol forced to 2; needed for ServerAliveInterval.
218
219 @param user User name to use for the ssh connection.
220 @param port Port on the target host to use for ssh connection.
221 @param opts Additional options to the ssh command.
222 @param hosts_file Ignored.
223 @param connect_timeout Ignored.
224 @param alive_interval Ignored.
225
226 @returns: An ssh command with the requested settings.
227
228 """
229 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
230 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
231 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
232 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
233 ' -o Protocol=2 -l %s -p %d')
234 return base_command % (opts, user, port)
235
236
237 def _make_scp_cmd(self, sources, dest):
238 """Format scp command.
239
240 Given a list of source paths and a destination path, produces the
241 appropriate scp command for encoding it. Remote paths must be
242 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
243 to allow additional ssh options.
244
245 @param sources: A list of source paths to copy from.
246 @param dest: Destination path to copy to.
247
248 @returns: An scp command that copies |sources| on local machine to
249 |dest| on the remote servo host.
250
251 """
252 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
253 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
254 return command % (self.master_ssh_option,
255 self.port, ' '.join(sources), dest)
256
257
258 def run(self, command, timeout=3600, ignore_status=False,
259 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
260 connect_timeout=30, options='', stdin=None, verbose=True, args=()):
261 """Run a command on the servo host.
262
263 Extends method `run` in SSHHost. If the servo host is a remote device,
264 it will call `run` in SSHost without changing anything.
265 If the servo host is 'localhost', it will call utils.system_output.
266
267 @param command: The command line string.
268 @param timeout: Time limit in seconds before attempting to
269 kill the running process. The run() function
270 will take a few seconds longer than 'timeout'
271 to complete if it has to kill the process.
272 @param ignore_status: Do not raise an exception, no matter
273 what the exit code of the command is.
274 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
275 @param connect_timeout: SSH connection timeout (in seconds)
276 Ignored if host is 'localhost'.
277 @param options: String with additional ssh command options
278 Ignored if host is 'localhost'.
279 @param stdin: Stdin to pass (a string) to the executed command.
280 @param verbose: Log the commands.
281 @param args: Sequence of strings to pass as arguments to command by
282 quoting them in " and escaping their contents if necessary.
283
284 @returns: A utils.CmdResult object.
285
286 @raises AutoservRunError if the command failed.
287 @raises AutoservSSHTimeout SSH connection has timed out. Only applies
288 when servo host is not 'localhost'.
289
290 """
291 run_args = {'command': command, 'timeout': timeout,
292 'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
293 'stderr_tee': stderr_tee, 'stdin': stdin,
294 'verbose': verbose, 'args': args}
295 if self.is_localhost():
296 if self._sudo_required:
297 run_args['command'] = 'sudo -n %s' % command
298 try:
299 return utils.run(**run_args)
300 except error.CmdError as e:
301 logging.error(e)
302 raise error.AutoservRunError('command execution error',
303 e.result_obj)
304 else:
305 run_args['connect_timeout'] = connect_timeout
306 run_args['options'] = options
307 return super(ServoHost, self).run(**run_args)
308
309
Dan Shi33412a82014-06-10 15:12:27 -0700310 @_timer.decorate
Fang Deng5d518f42013-08-02 14:04:32 -0700311 def _check_servod(self):
312 """A sanity check of the servod state."""
313 msg_prefix = 'Servod error: %s'
314 error_msg = None
315 try:
316 timeout, _ = retry.timeout(
317 self._servod_server.get, args=('pwr_button', ),
318 timeout_sec=self.PWR_BUTTON_CMD_TIMEOUT_SECS)
319 if timeout:
320 error_msg = msg_prefix % 'Request timed out.'
321 except (socket.error, xmlrpclib.Error, httplib.BadStatusLine) as e:
322 error_msg = msg_prefix % e
323 if error_msg:
324 raise ServoHostVerifyFailure(error_msg)
325
326
Dan Shi33412a82014-06-10 15:12:27 -0700327 def _check_servo_config(self):
328 """Check if config file exists for servod.
329
330 If servod config file does not exist, there is no need to verify if
331 servo is working. The servo could be attached to a board not supported
332 yet.
333
334 @raises ServoHostVerifyFailure if /var/lib/servod/config does not exist.
335
336 """
Simran Basi0739d682015-02-25 16:22:56 -0800337 if self._is_localhost:
338 return
Dan Shi33412a82014-06-10 15:12:27 -0700339 try:
340 self.run('test -f /var/lib/servod/config')
341 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
Ricky Liang86b80182014-06-13 14:39:42 +0800342 if not self._is_cros_host():
343 logging.info('Ignoring servo config check failure, either %s '
344 'is not running chromeos or we cannot find enough '
345 'information about the host.', self.hostname)
346 return
Dan Shi33412a82014-06-10 15:12:27 -0700347 raise ServoHostVerifyFailure(
348 'Servo config file check failed for %s: %s' %
349 (self.hostname, e))
350
351
Dan Shie5b3c512014-08-21 12:12:09 -0700352 def _check_servod_status(self):
353 """Check if servod process is running.
354
355 If servod is not running, there is no need to verify if servo is
356 working. Check the process before making any servod call can avoid
357 long timeout that eventually fail any servod call.
358 If the servo host is set to localhost, failure of servod status check
359 will be ignored, as servo call may use ssh tunnel.
360
361 @raises ServoHostVerifyFailure if servod process does not exist.
362
363 """
364 try:
Dan Shi18040e42014-09-03 11:14:00 -0700365 pids = [str(int(s)) for s in
366 self.run('pgrep servod').stdout.strip().split('\n')]
367 logging.info('servod is running, PID=%s', ','.join(pids))
Dan Shie5b3c512014-08-21 12:12:09 -0700368 except (error.AutoservRunError, error.AutoservSSHTimeout) as e:
369 if self._is_localhost:
370 logging.info('Ignoring servod status check failure. servo host '
371 'is set to localhost, servo call may use ssh '
372 'tunnel to go through.')
373 else:
374 raise ServoHostVerifyFailure(
375 'Servod status check failed for %s: %s' %
376 (self.hostname, e))
377
378
Dan Shi0942b1d2015-03-31 11:07:00 -0700379 def get_release_version(self):
380 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
381
382 @returns The version string in lsb-release, under attribute
383 CHROMEOS_RELEASE_VERSION.
384 """
385 lsb_release_content = self.run(
386 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
387 return lsbrelease_utils.get_chromeos_release_version(
388 lsb_release_content=lsb_release_content)
389
390
Richard Barnette3a7697f2016-04-20 11:33:27 -0700391 def _check_for_reboot(self, updater):
392 """
393 Reboot this servo host if an upgrade is waiting.
394
395 If the host has successfully downloaded and finalized a new
396 build, reboot.
397
398 @param updater: a ChromiumOSUpdater instance for checking
399 whether reboot is needed.
400 @return Return a (status, build) tuple reflecting the
401 update_engine status and current build of the host
402 at the end of the call.
403 """
404 current_build_number = self.get_release_version()
405 status = updater.check_update_status()
406 if status == autoupdater.UPDATER_NEED_REBOOT:
407 logging.info('Rebooting beaglebone host %s from build %s',
408 self.hostname, current_build_number)
409 # Tell the reboot() call not to wait for completion.
410 # Otherwise, the call will log reboot failure if servo does
411 # not come back. The logged reboot failure will lead to
412 # test job failure. If the test does not require servo, we
413 # don't want servo failure to fail the test with error:
414 # `Host did not return from reboot` in status.log.
415 reboot_cmd = 'sleep 1 ; reboot & sleep 10; reboot -f',
Richard Barnetteab9769f2016-06-01 15:01:44 -0700416 self.reboot(reboot_cmd=reboot_cmd, fastsync=True, wait=False)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700417
418 # We told the reboot() call not to wait, but we need to wait
419 # for the reboot before we continue. Alas. The code from
420 # here below is basically a copy of Host.wait_for_restart(),
421 # with the logging bits ripped out, so that they can't cause
422 # the failure logging problem described above.
423 #
424 # The black stain that this has left on my soul can never be
425 # erased.
426 old_boot_id = self.get_boot_id()
427 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
428 warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
429 old_boot_id=old_boot_id):
430 raise error.AutoservHostError(
431 'servo host %s failed to shut down.' %
432 self.hostname)
433 if self.wait_up(timeout=120):
434 current_build_number = self.get_release_version()
435 status = updater.check_update_status()
436 logging.info('servo host %s back from reboot, with build %s',
437 self.hostname, current_build_number)
438 else:
439 raise error.AutoservHostError(
440 'servo host %s failed to come back from reboot.' %
441 self.hostname)
442 return status, current_build_number
443
444
beeps5e8c45a2013-12-17 22:05:11 -0800445 @_timer.decorate
Richard Barnette3a7697f2016-04-20 11:33:27 -0700446 def update_image(self, wait_for_update=False):
beeps5e8c45a2013-12-17 22:05:11 -0800447 """Update the image on the servo host, if needed.
448
J. Richard Barnette84895392015-04-30 12:31:01 -0700449 This method recognizes the following cases:
450 * If the Host is not running Chrome OS, do nothing.
451 * If a previously triggered update is now complete, reboot
452 to the new version.
453 * If the host is processing a previously triggered update,
454 do nothing.
455 * If the host is running a version of Chrome OS different
456 from the default for servo Hosts, trigger an update, but
457 don't wait for it to complete.
beeps5e8c45a2013-12-17 22:05:11 -0800458
Richard Barnette3a7697f2016-04-20 11:33:27 -0700459 @param wait_for_update If an update needs to be applied and
460 this is true, then don't return until the update is
461 downloaded and finalized, and the host rebooted.
beeps5e8c45a2013-12-17 22:05:11 -0800462 @raises dev_server.DevServerException: If all the devservers are down.
463 @raises site_utils.ParseBuildNameException: If the devserver returns
464 an invalid build name.
465 @raises autoupdater.ChromiumOSError: If something goes wrong in the
466 checking update engine client status or applying an update.
467 @raises AutoservRunError: If the update_engine_client isn't present on
468 the host, and the host is a cros_host.
J. Richard Barnette84895392015-04-30 12:31:01 -0700469
beeps5e8c45a2013-12-17 22:05:11 -0800470 """
Dan Shib795b5a2015-09-24 13:26:35 -0700471 # servod could be running in a Ubuntu workstation.
beeps5e8c45a2013-12-17 22:05:11 -0800472 if not self._is_cros_host():
473 logging.info('Not attempting an update, either %s is not running '
474 'chromeos or we cannot find enough information about '
475 'the host.', self.hostname)
476 return
477
Dan Shib795b5a2015-09-24 13:26:35 -0700478 if lsbrelease_utils.is_moblab():
479 logging.info('Not attempting an update, %s is running moblab.',
480 self.hostname)
481 return
482
Richard Barnette3a7697f2016-04-20 11:33:27 -0700483 board = _CONFIG.get_config_value('CROS', 'servo_board')
J. Richard Barnette84895392015-04-30 12:31:01 -0700484 afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
485 target_version = afe.run('get_stable_version', board=board)
Dan Shi3b2adf62015-09-02 17:46:54 -0700486 build_pattern = _CONFIG.get_config_value(
J. Richard Barnette84895392015-04-30 12:31:01 -0700487 'CROS', 'stable_build_pattern')
488 target_build = build_pattern % (board, target_version)
489 target_build_number = server_site_utils.ParseBuildName(
490 target_build)[3]
beeps5e8c45a2013-12-17 22:05:11 -0800491 ds = dev_server.ImageServer.resolve(self.hostname)
J. Richard Barnette84895392015-04-30 12:31:01 -0700492 url = ds.get_update_url(target_build)
beeps5e8c45a2013-12-17 22:05:11 -0800493
494 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700495 status, current_build_number = self._check_for_reboot(updater)
496 update_pending = True
beeps5e8c45a2013-12-17 22:05:11 -0800497 if status in autoupdater.UPDATER_PROCESSING_UPDATE:
498 logging.info('servo host %s already processing an update, update '
499 'engine client status=%s', self.hostname, status)
J. Richard Barnette84895392015-04-30 12:31:01 -0700500 elif current_build_number != target_build_number:
beeps5e8c45a2013-12-17 22:05:11 -0800501 logging.info('Using devserver url: %s to trigger update on '
502 'servo host %s, from %s to %s', url, self.hostname,
J. Richard Barnette84895392015-04-30 12:31:01 -0700503 current_build_number, target_build_number)
beeps5e8c45a2013-12-17 22:05:11 -0800504 try:
J. Richard Barnette84895392015-04-30 12:31:01 -0700505 ds.stage_artifacts(target_build,
506 artifacts=['full_payload'])
507 except Exception as e:
508 logging.error('Staging artifacts failed: %s', str(e))
509 logging.error('Abandoning update for this cycle.')
beeps5e8c45a2013-12-17 22:05:11 -0800510 else:
J. Richard Barnette84895392015-04-30 12:31:01 -0700511 try:
Richard Barnette7e53aa02016-05-20 10:49:40 -0700512 # TODO(jrbarnette): This 'touch' is a gross hack
513 # to get us past crbug.com/613603. Once that
514 # bug is resolved, we should remove this code.
515 self.run('touch /home/chronos/.oobe_completed')
J. Richard Barnette84895392015-04-30 12:31:01 -0700516 updater.trigger_update()
517 except autoupdater.RootFSUpdateError as e:
518 trigger_download_status = 'failed with %s' % str(e)
519 autotest_stats.Counter(
520 'servo_host.RootFSUpdateError').increment()
521 else:
522 trigger_download_status = 'passed'
523 logging.info('Triggered download and update %s for %s, '
524 'update engine currently in status %s',
525 trigger_download_status, self.hostname,
526 updater.check_update_status())
beeps5e8c45a2013-12-17 22:05:11 -0800527 else:
528 logging.info('servo host %s does not require an update.',
529 self.hostname)
Richard Barnette3a7697f2016-04-20 11:33:27 -0700530 update_pending = False
531
532 if update_pending and wait_for_update:
533 logging.info('Waiting for servo update to complete.')
534 self.run('update_engine_client --follow', ignore_status=True)
535 status, current_build_number = self._check_for_reboot(updater)
536 if (status != autoupdater.UPDATER_IDLE or
537 current_build_number != target_build_number):
538 logging.error('Update failed; status: %s, '
539 'actual build: %s',
540 status, current_build_number)
541 message = ('Servo host failed to update from %s to %s' %
542 (current_build_number, target_build_number))
543 raise error.AutoservHostError(message)
beeps5e8c45a2013-12-17 22:05:11 -0800544
545
Fang Deng5d518f42013-08-02 14:04:32 -0700546 def verify_software(self):
beeps5e8c45a2013-12-17 22:05:11 -0800547 """Update the servo host and verify it's in a good state.
Fang Deng5d518f42013-08-02 14:04:32 -0700548
549 It overrides the base class function for verify_software.
beeps5e8c45a2013-12-17 22:05:11 -0800550 If an update is available, downloads and applies it. Then verifies:
Fang Deng5d518f42013-08-02 14:04:32 -0700551 1) Whether basic servo command can run successfully.
552 2) Whether USB is in a good state. crbug.com/225932
553
554 @raises ServoHostVerifyFailure if servo host does not pass the checks.
555
556 """
Richard Barnette79d78c42016-05-25 09:31:21 -0700557 # TODO(jrbarnette) Old versions of beaglebone_servo include
558 # the powerd package. In some (not yet understood)
559 # circumstances, powerd on beaglebone will shut down after
560 # attempting to suspend. Current versions of
561 # beaglebone_servo don't have powerd, but until we can purge
562 # the lab of the old images, we need to make sure powerd
563 # isn't running.
564 self.run('stop powerd', ignore_status=True)
565
beeps5e8c45a2013-12-17 22:05:11 -0800566 logging.info('Applying an update to the servo host, if necessary.')
Richard Barnette3a7697f2016-04-20 11:33:27 -0700567 self.update_image(wait_for_update=False)
Dan Shi33412a82014-06-10 15:12:27 -0700568 self._check_servo_config()
Dan Shie5b3c512014-08-21 12:12:09 -0700569 self._check_servod_status()
570
Dan Shi4d478522014-02-14 13:46:32 -0800571 # If servo is already initialized, we don't need to do it again, call
572 # _check_servod should be enough.
573 if self._servo:
574 self._check_servod()
575 else:
576 self._servo = servo.Servo(servo_host=self)
Dan Shie5b3c512014-08-21 12:12:09 -0700577 timeout, _ = retry.timeout(
578 self._servo.initialize_dut,
579 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
580 if timeout:
581 raise ServoHostVerifyFailure('Servo initialize timed out.')
Fang Deng5d518f42013-08-02 14:04:32 -0700582 logging.info('Sanity checks pass on servo host %s', self.hostname)
583
584
585 def _repair_with_sysrq_reboot(self):
586 """Reboot with magic SysRq key."""
587 self.reboot(timeout=self.REBOOT_TIMEOUT_SECS,
588 down_timeout=self.HOST_DOWN_TIMEOUT_SECS,
Richard Barnetteab9769f2016-06-01 15:01:44 -0700589 reboot_cmd='echo "b" > /proc/sysrq-trigger',
Fang Deng5d518f42013-08-02 14:04:32 -0700590 fastsync=True)
591 time.sleep(self.REBOOT_DELAY_SECS)
592
593
Fang Dengd4fe7392013-09-20 12:18:21 -0700594 def has_power(self):
595 """Return whether or not the servo host is powered by PoE."""
596 # TODO(fdeng): See crbug.com/302791
597 # For now, assume all servo hosts in the lab have power.
598 return self.is_in_lab()
599
600
601 def power_cycle(self):
602 """Cycle power to this host via PoE if it is a lab device.
603
604 @raises ServoHostRepairFailure if it fails to power cycle the
605 servo host.
606
607 """
608 if self.has_power():
609 try:
610 rpm_client.set_power(self.hostname, 'CYCLE')
611 except (socket.error, xmlrpclib.Error,
612 httplib.BadStatusLine,
613 rpm_client.RemotePowerException) as e:
614 raise ServoHostRepairFailure(
615 'Power cycling %s failed: %s' % (self.hostname, e))
616 else:
617 logging.info('Skipping power cycling, not a lab device.')
618
619
Fang Deng5d518f42013-08-02 14:04:32 -0700620 def _powercycle_to_repair(self):
Fang Dengd4fe7392013-09-20 12:18:21 -0700621 """Power cycle the servo host using PoE.
622
623 @raises ServoHostRepairFailure if it fails to fix the servo host.
Fang Dengf0ea6142013-10-10 21:43:16 -0700624 @raises ServoHostRepairMethodNA if it does not support power.
Fang Dengd4fe7392013-09-20 12:18:21 -0700625
626 """
627 if not self.has_power():
Fang Dengf0ea6142013-10-10 21:43:16 -0700628 raise ServoHostRepairMethodNA('%s does not support power.' %
629 self.hostname)
Fang Dengd4fe7392013-09-20 12:18:21 -0700630 logging.info('Attempting repair via PoE powercycle.')
631 failed_cycles = 0
632 self.power_cycle()
633 while not self.wait_up(timeout=self.REBOOT_TIMEOUT_SECS):
634 failed_cycles += 1
635 if failed_cycles >= self._MAX_POWER_CYCLE_ATTEMPTS:
636 raise ServoHostRepairFailure(
637 'Powercycled host %s %d times; device did not come back'
638 ' online.' % (self.hostname, failed_cycles))
639 self.power_cycle()
640 logging.info('Powercycling was successful after %d failures.',
641 failed_cycles)
642 # Allow some time for servod to get started.
643 time.sleep(self.REBOOT_DELAY_SECS)
Fang Deng5d518f42013-08-02 14:04:32 -0700644
645
J. Richard Barnettec2d99cf2015-11-18 12:46:15 -0800646 def repair(self):
Fang Deng5d518f42013-08-02 14:04:32 -0700647 """Attempt to repair servo host.
648
649 This overrides the base class function for repair.
650 Note if the host is not in Cros Lab, the repair procedure
651 will be skipped.
652
653 @raises ServoHostRepairTotalFailure if all attempts fail.
654
655 """
656 if not self.is_in_lab():
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700657 logging.warning('Skip repairing servo host %s: Not a lab device.',
Fang Deng5d518f42013-08-02 14:04:32 -0700658 self.hostname)
659 return
660 logging.info('Attempting to repair servo host %s.', self.hostname)
Dan Shi4d478522014-02-14 13:46:32 -0800661 # Reset the cache to guarantee servo initialization being called later.
662 self._servo = None
Tom Wai-Hong Tam0635dce2016-06-02 02:17:50 +0800663 repair_funcs = [self._repair_with_sysrq_reboot,
664 self._powercycle_to_repair]
Fang Deng5d518f42013-08-02 14:04:32 -0700665 errors = []
666 for repair_func in repair_funcs:
Fang Dengf0ea6142013-10-10 21:43:16 -0700667 counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
Fang Deng5d518f42013-08-02 14:04:32 -0700668 try:
669 repair_func()
670 self.verify()
Gabe Black1e1c41b2015-02-04 23:55:15 -0800671 autotest_stats.Counter(counter_prefix + 'SUCCEEDED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700672 return
Fang Dengf0ea6142013-10-10 21:43:16 -0700673 except ServoHostRepairMethodNA as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700674 logging.warning('Repair method NA: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800675 autotest_stats.Counter(counter_prefix + 'RepairNA').increment()
Fang Dengf0ea6142013-10-10 21:43:16 -0700676 errors.append(str(e))
Fang Deng5d518f42013-08-02 14:04:32 -0700677 except Exception as e:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -0700678 logging.warning('Failed to repair servo: %s', e)
Gabe Black1e1c41b2015-02-04 23:55:15 -0800679 autotest_stats.Counter(counter_prefix + 'FAILED').increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700680 errors.append(str(e))
Gabe Black1e1c41b2015-02-04 23:55:15 -0800681 autotest_stats.Counter('servo_host_repair.Full_Repair_Failed'). \
682 increment()
Fang Deng5d518f42013-08-02 14:04:32 -0700683 raise ServoHostRepairTotalFailure(
684 'All attempts at repairing the servo failed:\n%s' %
685 '\n'.join(errors))
686
687
Dan Shi4d478522014-02-14 13:46:32 -0800688 def get_servo(self):
689 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700690
Dan Shi4d478522014-02-14 13:46:32 -0800691 @return: a servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700692 """
Dan Shi4d478522014-02-14 13:46:32 -0800693 return self._servo
694
695
Richard Barnetteea3e4602016-06-10 12:36:41 -0700696def make_servo_hostname(dut_hostname):
697 """Given a DUT's hostname, return the hostname of its servo.
698
699 @param dut_hostname: hostname of a DUT.
700
701 @return hostname of the DUT's servo.
702
703 """
704 host_parts = dut_hostname.split('.')
705 host_parts[0] = host_parts[0] + '-servo'
706 return '.'.join(host_parts)
707
708
709def servo_host_is_up(servo_hostname):
710 """
711 Given a servo host name, return if it's up or not.
712
713 @param servo_hostname: hostname of the servo host.
714
715 @return True if it's up, False otherwise
716 """
717 # Technically, this duplicates the SSH ping done early in the servo
718 # proxy initialization code. However, this ping ends in a couple
719 # seconds when if fails, rather than the 60 seconds it takes to decide
720 # that an SSH ping has timed out. Specifically, that timeout happens
721 # when our servo DNS name resolves, but there is no host at that IP.
722 logging.info('Pinging servo host at %s', servo_hostname)
723 ping_config = ping_runner.PingConfig(
724 servo_hostname, count=3,
725 ignore_result=True, ignore_status=True)
726 return ping_runner.PingRunner().ping(ping_config).received > 0
727
728
729def _get_standard_servo_args(dut_host):
730 """
731 Return servo data associated with a given DUT.
732
733 This checks for the presence of servo host and port attached to the
734 given `dut_host`. This data should be stored in the
735 `host_attributes` field in the provided `dut_host` parameter.
736
737 @param dut_host Instance of `Host` on which to find the servo
738 attributes.
739 @return A tuple of `servo_args` dict with host and an option port,
740 plus an `is_in_lab` flag indicating whether this in the CrOS
741 test lab, or some different environment.
742 """
743 servo_args = None
744 is_in_lab = False
745 is_ssp_moblab = False
746 if utils.is_in_container():
747 is_moblab = _CONFIG.get_config_value(
748 'SSP', 'is_moblab', type=bool, default=False)
749 is_ssp_moblab = is_moblab
750 else:
751 is_moblab = utils.is_moblab()
752 attrs = dut_host.host_attributes
753 if attrs and SERVO_HOST_ATTR in attrs:
754 servo_host = attrs[SERVO_HOST_ATTR]
755 if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']):
756 servo_host = _CONFIG.get_config_value(
757 'SSP', 'host_container_ip', type=str, default=None)
758 servo_args = {SERVO_HOST_ATTR: servo_host}
759 if SERVO_PORT_ATTR in attrs:
760 servo_args[SERVO_PORT_ATTR] = attrs[SERVO_PORT_ATTR]
761 is_in_lab = (not is_moblab
762 and utils.host_is_in_lab_zone(servo_host))
763
764 # TODO(jrbarnette): This test to use the default lab servo hostname
765 # is a legacy that we need only until every host in the DB has
766 # proper attributes.
767 elif (not is_moblab and
768 not dnsname_mangler.is_ip_address(dut_host.hostname)):
769 servo_host = make_servo_hostname(dut_host.hostname)
770 is_in_lab = utils.host_is_in_lab_zone(servo_host)
771 if is_in_lab:
772 servo_args = {SERVO_HOST_ATTR: servo_host}
773 return servo_args, is_in_lab
774
775
Dan Shi023aae32016-05-25 11:13:01 -0700776def create_servo_host(dut, servo_args, try_lab_servo=False,
777 skip_host_up_check=False):
Richard Barnetteea3e4602016-06-10 12:36:41 -0700778 """
779 Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -0800780
Richard Barnetteea3e4602016-06-10 12:36:41 -0700781 This function attempts to create a `ServoHost` object for a servo
782 connected to the given `dut`. The function distinguishes these
783 cases:
784 * No servo parameters for the DUT can be determined. No servo
785 host is created.
786 * The servo host should be created if parameters can be
787 determined.
788 * The servo host should not be created even if parameters are
789 known.
Fang Denge545abb2014-12-30 18:43:47 -0800790
Richard Barnetteea3e4602016-06-10 12:36:41 -0700791 Servo parameters consist of a host name and port number, and are
792 determined from one of these sources, in order of priority:
793 * Servo attributes from the `dut` parameter take precedence over
794 all other sources of information.
795 * If a DNS entry for the servo based on the DUT hostname exists in
796 the CrOS lab network, that hostname is used with the default
797 port.
798 * If no other options are found, the parameters will be taken
799 from a `servo_args` dict passed in from the caller.
Fang Denge545abb2014-12-30 18:43:47 -0800800
Richard Barnetteea3e4602016-06-10 12:36:41 -0700801 A servo host object will be created if servo parameters can be
802 determined and any of the following criteria are met:
803 * The `servo_args` parameter was not `None`.
804 * The `skip_host_up_check` parameter is true.
805 * The `try_lab_servo` parameter is true, and the specified
806 servo host responds to ping.
Dan Shi4d478522014-02-14 13:46:32 -0800807
Richard Barnetteea3e4602016-06-10 12:36:41 -0700808 The servo host will be checked via `verify()` at the time of
809 creation. Failures are ignored unless the `servo_args` parameter
810 was not `None`. In that case:
811 * If the servo appears to be in the test lab, an attempt will
812 be made to repair it.
813 * If the error isn't repaired, the exception from `verify()` will
814 be passed back to the caller.
815
816 @param dut An instance of `Host` from which to take
817 servo parameters (if available).
818 @param servo_args A dictionary with servo parameters to use if
819 they can't be found from `dut`. If this
820 argument is supplied, unrepaired exceptions
821 from `verify()` will be passed back to the
822 caller.
823 @param try_lab_servo If not true, servo host creation will be
824 skipped unless otherwise required by the
825 caller.
826 @param skip_host_up_check If true, do not check whether the host
827 responds to ping.
Dan Shi4d478522014-02-14 13:46:32 -0800828
829 @returns: A ServoHost object or None. See comments above.
830
831 """
Dan Shi5401d2e2015-09-10 15:42:06 -0700832 required_by_test = servo_args is not None
Richard Barnetteea3e4602016-06-10 12:36:41 -0700833 is_in_lab = False
834 if try_lab_servo or required_by_test:
835 servo_args_override, is_in_lab = _get_standard_servo_args(dut)
836 if servo_args_override is not None:
837 servo_args = servo_args_override
838 if servo_args is None:
839 return None
840 if (required_by_test or skip_host_up_check
841 or servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
842 return ServoHost(required_by_test=required_by_test,
843 is_in_lab=is_in_lab, **servo_args)
Dan Shi4d478522014-02-14 13:46:32 -0800844 else:
Dan Shibbb0cb62014-03-24 17:50:57 -0700845 return None